err.no Git - linux-2.6/blob - net/ipv4/tcp_ipv4.c

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  * Version:     $Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $
   9  *
  10  *              IPv4 specific functions
  11  *
  12  *
  13  *              code split from:
  14  *              linux/ipv4/tcp.c
  15  *              linux/ipv4/tcp_input.c
  16  *              linux/ipv4/tcp_output.c
  17  *
  18  *              See tcp.c for author information
  19  *
  20  *      This program is free software; you can redistribute it and/or
  21  *      modify it under the terms of the GNU General Public License
  22  *      as published by the Free Software Foundation; either version
  23  *      2 of the License, or (at your option) any later version.
  24  */
  25
  26 /*
  27  * Changes:
  28  *              David S. Miller :       New socket lookup architecture.
  29  *                                      This code is dedicated to John Dyson.
  30  *              David S. Miller :       Change semantics of established hash,
  31  *                                      half is devoted to TIME_WAIT sockets
  32  *                                      and the rest go in the other half.
  33  *              Andi Kleen :            Add support for syncookies and fixed
  34  *                                      some bugs: ip options weren't passed to
  35  *                                      the TCP layer, missed a check for an
  36  *                                      ACK bit.
  37  *              Andi Kleen :            Implemented fast path mtu discovery.
  38  *                                      Fixed many serious bugs in the
  39  *                                      request_sock handling and moved
  40  *                                      most of it into the af independent code.
  41  *                                      Added tail drop and some other bugfixes.
  42  *                                      Added new listen semantics.
  43  *              Mike McLagan    :       Routing by source
  44  *      Juan Jose Ciarlante:            ip_dynaddr bits
  45  *              Andi Kleen:             various fixes.
  46  *      Vitaly E. Lavrov        :       Transparent proxy revived after year
  47  *                                      coma.
  48  *      Andi Kleen              :       Fix new listen.
  49  *      Andi Kleen              :       Fix accept error reporting.
  50  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
  51  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
  52  *                                      a single port at the same time.
  53  */
  54
  55
  56 #include <linux/types.h>
  57 #include <linux/fcntl.h>
  58 #include <linux/module.h>
  59 #include <linux/random.h>
  60 #include <linux/cache.h>
  61 #include <linux/jhash.h>
  62 #include <linux/init.h>
  63 #include <linux/times.h>
  64
  65 #include <net/icmp.h>
  66 #include <net/inet_hashtables.h>
  67 #include <net/tcp.h>
  68 #include <net/transp_v6.h>
  69 #include <net/ipv6.h>
  70 #include <net/inet_common.h>
  71 #include <net/timewait_sock.h>
  72 #include <net/xfrm.h>
  73 #include <net/netdma.h>
  74
  75 #include <linux/inet.h>
  76 #include <linux/ipv6.h>
  77 #include <linux/stddef.h>
  78 #include <linux/proc_fs.h>
  79 #include <linux/seq_file.h>
  80
  81 #include <linux/crypto.h>
  82 #include <linux/scatterlist.h>
  83
  84 int sysctl_tcp_tw_reuse __read_mostly;
  85 int sysctl_tcp_low_latency __read_mostly;
  86
  87 /* Check TCP sequence numbers in ICMP packets. */
  88 #define ICMP_MIN_LENGTH 8
  89
  90 /* Socket used for sending RSTs */
  91 static struct socket *tcp_socket;
  92
  93 void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
  94
  95 #ifdef CONFIG_TCP_MD5SIG
  96 static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
  97                                                    __be32 addr);
  98 static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
  99                                    __be32 saddr, __be32 daddr,
 100                                    struct tcphdr *th, int protocol,
 101                                    int tcplen);
 102 #endif
 103
 104 struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
 105         .lhash_lock  = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock),
 106         .lhash_users = ATOMIC_INIT(0),
 107         .lhash_wait  = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
 108 };
 109
 110 static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
 111 {
 112         return inet_csk_get_port(&tcp_hashinfo, sk, snum,
 113                                  inet_csk_bind_conflict);
 114 }
 115
 116 static void tcp_v4_hash(struct sock *sk)
 117 {
 118         inet_hash(&tcp_hashinfo, sk);
 119 }
 120
 121 void tcp_unhash(struct sock *sk)
 122 {
 123         inet_unhash(&tcp_hashinfo, sk);
 124 }
 125
 126 static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
 127 {
 128         return secure_tcp_sequence_number(skb->nh.iph->daddr,
 129                                           skb->nh.iph->saddr,
 130                                           skb->h.th->dest,
 131                                           skb->h.th->source);
 132 }
 133
 134 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 135 {
 136         const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
 137         struct tcp_sock *tp = tcp_sk(sk);
 138
 139         /* With PAWS, it is safe from the viewpoint
 140            of data integrity. Even without PAWS it is safe provided sequence
 141            spaces do not overlap i.e. at data rates <= 80Mbit/sec.
 142
 143            Actually, the idea is close to VJ's one, only timestamp cache is
 144            held not per host, but per port pair and TW bucket is used as state
 145            holder.
 146
 147            If TW bucket has been already destroyed we fall back to VJ's scheme
 148            and use initial timestamp retrieved from peer table.
 149          */
 150         if (tcptw->tw_ts_recent_stamp &&
 151             (twp == NULL || (sysctl_tcp_tw_reuse &&
 152                              xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
 153                 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
 154                 if (tp->write_seq == 0)
 155                         tp->write_seq = 1;
 156                 tp->rx_opt.ts_recent       = tcptw->tw_ts_recent;
 157                 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
 158                 sock_hold(sktw);
 159                 return 1;
 160         }
 161
 162         return 0;
 163 }
 164
 165 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
 166
 167 /* This will initiate an outgoing connection. */
 168 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 169 {
 170         struct inet_sock *inet = inet_sk(sk);
 171         struct tcp_sock *tp = tcp_sk(sk);
 172         struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
 173         struct rtable *rt;
 174         __be32 daddr, nexthop;
 175         int tmp;
 176         int err;
 177
 178         if (addr_len < sizeof(struct sockaddr_in))
 179                 return -EINVAL;
 180
 181         if (usin->sin_family != AF_INET)
 182                 return -EAFNOSUPPORT;
 183
 184         nexthop = daddr = usin->sin_addr.s_addr;
 185         if (inet->opt && inet->opt->srr) {
 186                 if (!daddr)
 187                         return -EINVAL;
 188                 nexthop = inet->opt->faddr;
 189         }
 190
 191         tmp = ip_route_connect(&rt, nexthop, inet->saddr,
 192                                RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
 193                                IPPROTO_TCP,
 194                                inet->sport, usin->sin_port, sk);
 195         if (tmp < 0)
 196                 return tmp;
 197
 198         if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
 199                 ip_rt_put(rt);
 200                 return -ENETUNREACH;
 201         }
 202
 203         if (!inet->opt || !inet->opt->srr)
 204                 daddr = rt->rt_dst;
 205
 206         if (!inet->saddr)
 207                 inet->saddr = rt->rt_src;
 208         inet->rcv_saddr = inet->saddr;
 209
 210         if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
 211                 /* Reset inherited state */
 212                 tp->rx_opt.ts_recent       = 0;
 213                 tp->rx_opt.ts_recent_stamp = 0;
 214                 tp->write_seq              = 0;
 215         }
 216
 217         if (tcp_death_row.sysctl_tw_recycle &&
 218             !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
 219                 struct inet_peer *peer = rt_get_peer(rt);
 220                 /*
 221                  * VJ's idea. We save last timestamp seen from
 222                  * the destination in peer table, when entering state
 223                  * TIME-WAIT * and initialize rx_opt.ts_recent from it,
 224                  * when trying new connection.
 225                  */
 226                 if (peer != NULL &&
 227                     peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {
 228                         tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
 229                         tp->rx_opt.ts_recent = peer->tcp_ts;
 230                 }
 231         }
 232
 233         inet->dport = usin->sin_port;
 234         inet->daddr = daddr;
 235
 236         inet_csk(sk)->icsk_ext_hdr_len = 0;
 237         if (inet->opt)
 238                 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
 239
 240         tp->rx_opt.mss_clamp = 536;
 241
 242         /* Socket identity is still unknown (sport may be zero).
 243          * However we set state to SYN-SENT and not releasing socket
 244          * lock select source port, enter ourselves into the hash tables and
 245          * complete initialization after this.
 246          */
 247         tcp_set_state(sk, TCP_SYN_SENT);
 248         err = inet_hash_connect(&tcp_death_row, sk);
 249         if (err)
 250                 goto failure;
 251
 252         err = ip_route_newports(&rt, IPPROTO_TCP,
 253                                 inet->sport, inet->dport, sk);
 254         if (err)
 255                 goto failure;
 256
 257         /* OK, now commit destination to socket.  */
 258         sk->sk_gso_type = SKB_GSO_TCPV4;
 259         sk_setup_caps(sk, &rt->u.dst);
 260
 261         if (!tp->write_seq)
 262                 tp->write_seq = secure_tcp_sequence_number(inet->saddr,
 263                                                            inet->daddr,
 264                                                            inet->sport,
 265                                                            usin->sin_port);
 266
 267         inet->id = tp->write_seq ^ jiffies;
 268
 269         err = tcp_connect(sk);
 270         rt = NULL;
 271         if (err)
 272                 goto failure;
 273
 274         return 0;
 275
 276 failure:
 277         /*
 278          * This unhashes the socket and releases the local port,
 279          * if necessary.
 280          */
 281         tcp_set_state(sk, TCP_CLOSE);
 282         ip_rt_put(rt);
 283         sk->sk_route_caps = 0;
 284         inet->dport = 0;
 285         return err;
 286 }
 287
 288 /*
 289  * This routine does path mtu discovery as defined in RFC1191.
 290  */
 291 static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
 292 {
 293         struct dst_entry *dst;
 294         struct inet_sock *inet = inet_sk(sk);
 295
 296         /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
 297          * send out by Linux are always <576bytes so they should go through
 298          * unfragmented).
 299          */
 300         if (sk->sk_state == TCP_LISTEN)
 301                 return;
 302
 303         /* We don't check in the destentry if pmtu discovery is forbidden
 304          * on this route. We just assume that no packet_to_big packets
 305          * are send back when pmtu discovery is not active.
 306          * There is a small race when the user changes this flag in the
 307          * route, but I think that's acceptable.
 308          */
 309         if ((dst = __sk_dst_check(sk, 0)) == NULL)
 310                 return;
 311
 312         dst->ops->update_pmtu(dst, mtu);
 313
 314         /* Something is about to be wrong... Remember soft error
 315          * for the case, if this connection will not able to recover.
 316          */
 317         if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
 318                 sk->sk_err_soft = EMSGSIZE;
 319
 320         mtu = dst_mtu(dst);
 321
 322         if (inet->pmtudisc != IP_PMTUDISC_DONT &&
 323             inet_csk(sk)->icsk_pmtu_cookie > mtu) {
 324                 tcp_sync_mss(sk, mtu);
 325
 326                 /* Resend the TCP packet because it's
 327                  * clear that the old packet has been
 328                  * dropped. This is the new "fast" path mtu
 329                  * discovery.
 330                  */
 331                 tcp_simple_retransmit(sk);
 332         } /* else let the usual retransmit timer handle it */
 333 }
 334
 335 /*
 336  * This routine is called by the ICMP module when it gets some
 337  * sort of error condition.  If err < 0 then the socket should
 338  * be closed and the error returned to the user.  If err > 0
 339  * it's just the icmp type << 8 | icmp code.  After adjustment
 340  * header points to the first 8 bytes of the tcp header.  We need
 341  * to find the appropriate port.
 342  *
 343  * The locking strategy used here is very "optimistic". When
 344  * someone else accesses the socket the ICMP is just dropped
 345  * and for some paths there is no check at all.
 346  * A more general error queue to queue errors for later handling
 347  * is probably better.
 348  *
 349  */
 350
 351 void tcp_v4_err(struct sk_buff *skb, u32 info)
 352 {
 353         struct iphdr *iph = (struct iphdr *)skb->data;
 354         struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
 355         struct tcp_sock *tp;
 356         struct inet_sock *inet;
 357         int type = skb->h.icmph->type;
 358         int code = skb->h.icmph->code;
 359         struct sock *sk;
 360         __u32 seq;
 361         int err;
 362
 363         if (skb->len < (iph->ihl << 2) + 8) {
 364                 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
 365                 return;
 366         }
 367
 368         sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr,
 369                          th->source, inet_iif(skb));
 370         if (!sk) {
 371                 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
 372                 return;
 373         }
 374         if (sk->sk_state == TCP_TIME_WAIT) {
 375                 inet_twsk_put(inet_twsk(sk));
 376                 return;
 377         }
 378
 379         bh_lock_sock(sk);
 380         /* If too many ICMPs get dropped on busy
 381          * servers this needs to be solved differently.
 382          */
 383         if (sock_owned_by_user(sk))
 384                 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
 385
 386         if (sk->sk_state == TCP_CLOSE)
 387                 goto out;
 388
 389         tp = tcp_sk(sk);
 390         seq = ntohl(th->seq);
 391         if (sk->sk_state != TCP_LISTEN &&
 392             !between(seq, tp->snd_una, tp->snd_nxt)) {
 393                 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
 394                 goto out;
 395         }
 396
 397         switch (type) {
 398         case ICMP_SOURCE_QUENCH:
 399                 /* Just silently ignore these. */
 400                 goto out;
 401         case ICMP_PARAMETERPROB:
 402                 err = EPROTO;
 403                 break;
 404         case ICMP_DEST_UNREACH:
 405                 if (code > NR_ICMP_UNREACH)
 406                         goto out;
 407
 408                 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
 409                         if (!sock_owned_by_user(sk))
 410                                 do_pmtu_discovery(sk, iph, info);
 411                         goto out;
 412                 }
 413
 414                 err = icmp_err_convert[code].errno;
 415                 break;
 416         case ICMP_TIME_EXCEEDED:
 417                 err = EHOSTUNREACH;
 418                 break;
 419         default:
 420                 goto out;
 421         }
 422
 423         switch (sk->sk_state) {
 424                 struct request_sock *req, **prev;
 425         case TCP_LISTEN:
 426                 if (sock_owned_by_user(sk))
 427                         goto out;
 428
 429                 req = inet_csk_search_req(sk, &prev, th->dest,
 430                                           iph->daddr, iph->saddr);
 431                 if (!req)
 432                         goto out;
 433
 434                 /* ICMPs are not backlogged, hence we cannot get
 435                    an established socket here.
 436                  */
 437                 BUG_TRAP(!req->sk);
 438
 439                 if (seq != tcp_rsk(req)->snt_isn) {
 440                         NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
 441                         goto out;
 442                 }
 443
 444                 /*
 445                  * Still in SYN_RECV, just remove it silently.
 446                  * There is no good way to pass the error to the newly
 447                  * created socket, and POSIX does not want network
 448                  * errors returned from accept().
 449                  */
 450                 inet_csk_reqsk_queue_drop(sk, req, prev);
 451                 goto out;
 452
 453         case TCP_SYN_SENT:
 454         case TCP_SYN_RECV:  /* Cannot happen.
 455                                It can f.e. if SYNs crossed.
 456                              */
 457                 if (!sock_owned_by_user(sk)) {
 458                         sk->sk_err = err;
 459
 460                         sk->sk_error_report(sk);
 461
 462                         tcp_done(sk);
 463                 } else {
 464                         sk->sk_err_soft = err;
 465                 }
 466                 goto out;
 467         }
 468
 469         /* If we've already connected we will keep trying
 470          * until we time out, or the user gives up.
 471          *
 472          * rfc1122 4.2.3.9 allows to consider as hard errors
 473          * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
 474          * but it is obsoleted by pmtu discovery).
 475          *
 476          * Note, that in modern internet, where routing is unreliable
 477          * and in each dark corner broken firewalls sit, sending random
 478          * errors ordered by their masters even this two messages finally lose
 479          * their original sense (even Linux sends invalid PORT_UNREACHs)
 480          *
 481          * Now we are in compliance with RFCs.
 482          *                                                      --ANK (980905)
 483          */
 484
 485         inet = inet_sk(sk);
 486         if (!sock_owned_by_user(sk) && inet->recverr) {
 487                 sk->sk_err = err;
 488                 sk->sk_error_report(sk);
 489         } else  { /* Only an error on timeout */
 490                 sk->sk_err_soft = err;
 491         }
 492
 493 out:
 494         bh_unlock_sock(sk);
 495         sock_put(sk);
 496 }
 497
 498 /* This routine computes an IPv4 TCP checksum. */
 499 void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
 500 {
 501         struct inet_sock *inet = inet_sk(sk);
 502         struct tcphdr *th = skb->h.th;
 503
 504         if (skb->ip_summed == CHECKSUM_PARTIAL) {
 505                 th->check = ~tcp_v4_check(th, len,
 506                                           inet->saddr, inet->daddr, 0);
 507                 skb->csum = offsetof(struct tcphdr, check);
 508         } else {
 509                 th->check = tcp_v4_check(th, len, inet->saddr, inet->daddr,
 510                                          csum_partial((char *)th,
 511                                                       th->doff << 2,
 512                                                       skb->csum));
 513         }
 514 }
 515
 516 int tcp_v4_gso_send_check(struct sk_buff *skb)
 517 {
 518         struct iphdr *iph;
 519         struct tcphdr *th;
 520
 521         if (!pskb_may_pull(skb, sizeof(*th)))
 522                 return -EINVAL;
 523
 524         iph = skb->nh.iph;
 525         th = skb->h.th;
 526
 527         th->check = 0;
 528         th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0);
 529         skb->csum = offsetof(struct tcphdr, check);
 530         skb->ip_summed = CHECKSUM_PARTIAL;
 531         return 0;
 532 }
 533
 534 /*
 535  *      This routine will send an RST to the other tcp.
 536  *
 537  *      Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
 538  *                    for reset.
 539  *      Answer: if a packet caused RST, it is not for a socket
 540  *              existing in our system, if it is matched to a socket,
 541  *              it is just duplicate segment or bug in other side's TCP.
 542  *              So that we build reply only basing on parameters
 543  *              arrived with segment.
 544  *      Exception: precedence violation. We do not implement it in any case.
 545  */
 546
 547 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 548 {
 549         struct tcphdr *th = skb->h.th;
 550         struct {
 551                 struct tcphdr th;
 552 #ifdef CONFIG_TCP_MD5SIG
 553                 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
 554 #endif
 555         } rep;
 556         struct ip_reply_arg arg;
 557 #ifdef CONFIG_TCP_MD5SIG
 558         struct tcp_md5sig_key *key;
 559 #endif
 560
 561         /* Never send a reset in response to a reset. */
 562         if (th->rst)
 563                 return;
 564
 565         if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL)
 566                 return;
 567
 568         /* Swap the send and the receive. */
 569         memset(&rep, 0, sizeof(rep));
 570         rep.th.dest   = th->source;
 571         rep.th.source = th->dest;
 572         rep.th.doff   = sizeof(struct tcphdr) / 4;
 573         rep.th.rst    = 1;
 574
 575         if (th->ack) {
 576                 rep.th.seq = th->ack_seq;
 577         } else {
 578                 rep.th.ack = 1;
 579                 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
 580                                        skb->len - (th->doff << 2));
 581         }
 582
 583         memset(&arg, 0, sizeof(arg));
 584         arg.iov[0].iov_base = (unsigned char *)&rep;
 585         arg.iov[0].iov_len  = sizeof(rep.th);
 586
 587 #ifdef CONFIG_TCP_MD5SIG
 588         key = sk ? tcp_v4_md5_do_lookup(sk, skb->nh.iph->daddr) : NULL;
 589         if (key) {
 590                 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
 591                                    (TCPOPT_NOP << 16) |
 592                                    (TCPOPT_MD5SIG << 8) |
 593                                    TCPOLEN_MD5SIG);
 594                 /* Update length and the length the header thinks exists */
 595                 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 596                 rep.th.doff = arg.iov[0].iov_len / 4;
 597
 598                 tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1],
 599                                         key,
 600                                         skb->nh.iph->daddr,
 601                                         skb->nh.iph->saddr,
 602                                         &rep.th, IPPROTO_TCP,
 603                                         arg.iov[0].iov_len);
 604         }
 605 #endif
 606         arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
 607                                       skb->nh.iph->saddr, /* XXX */
 608                                       sizeof(struct tcphdr), IPPROTO_TCP, 0);
 609         arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 610
 611         ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
 612
 613         TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
 614         TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
 615 }
 616
 617 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
 618    outside socket context is ugly, certainly. What can I do?
 619  */
 620
 621 static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
 622                             struct sk_buff *skb, u32 seq, u32 ack,
 623                             u32 win, u32 ts)
 624 {
 625         struct tcphdr *th = skb->h.th;
 626         struct {
 627                 struct tcphdr th;
 628                 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
 629 #ifdef CONFIG_TCP_MD5SIG
 630                            + (TCPOLEN_MD5SIG_ALIGNED >> 2)
 631 #endif
 632                         ];
 633         } rep;
 634         struct ip_reply_arg arg;
 635 #ifdef CONFIG_TCP_MD5SIG
 636         struct tcp_md5sig_key *key;
 637         struct tcp_md5sig_key tw_key;
 638 #endif
 639
 640         memset(&rep.th, 0, sizeof(struct tcphdr));
 641         memset(&arg, 0, sizeof(arg));
 642
 643         arg.iov[0].iov_base = (unsigned char *)&rep;
 644         arg.iov[0].iov_len  = sizeof(rep.th);
 645         if (ts) {
 646                 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 647                                    (TCPOPT_TIMESTAMP << 8) |
 648                                    TCPOLEN_TIMESTAMP);
 649                 rep.opt[1] = htonl(tcp_time_stamp);
 650                 rep.opt[2] = htonl(ts);
 651                 arg.iov[0].iov_len = TCPOLEN_TSTAMP_ALIGNED;
 652         }
 653
 654         /* Swap the send and the receive. */
 655         rep.th.dest    = th->source;
 656         rep.th.source  = th->dest;
 657         rep.th.doff    = arg.iov[0].iov_len / 4;
 658         rep.th.seq     = htonl(seq);
 659         rep.th.ack_seq = htonl(ack);
 660         rep.th.ack     = 1;
 661         rep.th.window  = htons(win);
 662
 663 #ifdef CONFIG_TCP_MD5SIG
 664         /*
 665          * The SKB holds an imcoming packet, but may not have a valid ->sk
 666          * pointer. This is especially the case when we're dealing with a
 667          * TIME_WAIT ack, because the sk structure is long gone, and only
 668          * the tcp_timewait_sock remains. So the md5 key is stashed in that
 669          * structure, and we use it in preference.  I believe that (twsk ||
 670          * skb->sk) holds true, but we program defensively.
 671          */
 672         if (!twsk && skb->sk) {
 673                 key = tcp_v4_md5_do_lookup(skb->sk, skb->nh.iph->daddr);
 674         } else if (twsk && twsk->tw_md5_keylen) {
 675                 tw_key.key = twsk->tw_md5_key;
 676                 tw_key.keylen = twsk->tw_md5_keylen;
 677                 key = &tw_key;
 678         } else
 679                 key = NULL;
 680
 681         if (key) {
 682                 int offset = (ts) ? 3 : 0;
 683
 684                 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
 685                                           (TCPOPT_NOP << 16) |
 686                                           (TCPOPT_MD5SIG << 8) |
 687                                           TCPOLEN_MD5SIG);
 688                 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 689                 rep.th.doff = arg.iov[0].iov_len/4;
 690
 691                 tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset],
 692                                         key,
 693                                         skb->nh.iph->daddr,
 694                                         skb->nh.iph->saddr,
 695                                         &rep.th, IPPROTO_TCP,
 696                                         arg.iov[0].iov_len);
 697         }
 698 #endif
 699         arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
 700                                       skb->nh.iph->saddr, /* XXX */
 701                                       arg.iov[0].iov_len, IPPROTO_TCP, 0);
 702         arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 703
 704         ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
 705
 706         TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
 707 }
 708
 709 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 710 {
 711         struct inet_timewait_sock *tw = inet_twsk(sk);
 712         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
 713
 714         tcp_v4_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 715                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
 716                         tcptw->tw_ts_recent);
 717
 718         inet_twsk_put(tw);
 719 }
 720
 721 static void tcp_v4_reqsk_send_ack(struct sk_buff *skb,
 722                                   struct request_sock *req)
 723 {
 724         tcp_v4_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1,
 725                         tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
 726                         req->ts_recent);
 727 }
 728
 729 /*
 730  *      Send a SYN-ACK after having received an ACK.
 731  *      This still operates on a request_sock only, not on a big
 732  *      socket.
 733  */
 734 static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
 735                               struct dst_entry *dst)
 736 {
 737         const struct inet_request_sock *ireq = inet_rsk(req);
 738         int err = -1;
 739         struct sk_buff * skb;
 740
 741         /* First, grab a route. */
 742         if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
 743                 goto out;
 744
 745         skb = tcp_make_synack(sk, dst, req);
 746
 747         if (skb) {
 748                 struct tcphdr *th = skb->h.th;
 749
 750                 th->check = tcp_v4_check(th, skb->len,
 751                                          ireq->loc_addr,
 752                                          ireq->rmt_addr,
 753                                          csum_partial((char *)th, skb->len,
 754                                                       skb->csum));
 755
 756                 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
 757                                             ireq->rmt_addr,
 758                                             ireq->opt);
 759                 err = net_xmit_eval(err);
 760         }
 761
 762 out:
 763         dst_release(dst);
 764         return err;
 765 }
 766
 767 /*
 768  *      IPv4 request_sock destructor.
 769  */
 770 static void tcp_v4_reqsk_destructor(struct request_sock *req)
 771 {
 772         kfree(inet_rsk(req)->opt);
 773 }
 774
 775 #ifdef CONFIG_SYN_COOKIES
 776 static void syn_flood_warning(struct sk_buff *skb)
 777 {
 778         static unsigned long warntime;
 779
 780         if (time_after(jiffies, (warntime + HZ * 60))) {
 781                 warntime = jiffies;
 782                 printk(KERN_INFO
 783                        "possible SYN flooding on port %d. Sending cookies.\n",
 784                        ntohs(skb->h.th->dest));
 785         }
 786 }
 787 #endif
 788
 789 /*
 790  * Save and compile IPv4 options into the request_sock if needed.
 791  */
 792 static struct ip_options *tcp_v4_save_options(struct sock *sk,
 793                                               struct sk_buff *skb)
 794 {
 795         struct ip_options *opt = &(IPCB(skb)->opt);
 796         struct ip_options *dopt = NULL;
 797
 798         if (opt && opt->optlen) {
 799                 int opt_size = optlength(opt);
 800                 dopt = kmalloc(opt_size, GFP_ATOMIC);
 801                 if (dopt) {
 802                         if (ip_options_echo(dopt, skb)) {
 803                                 kfree(dopt);
 804                                 dopt = NULL;
 805                         }
 806                 }
 807         }
 808         return dopt;
 809 }
 810
 811 #ifdef CONFIG_TCP_MD5SIG
 812 /*
 813  * RFC2385 MD5 checksumming requires a mapping of
 814  * IP address->MD5 Key.
 815  * We need to maintain these in the sk structure.
 816  */
 817
 818 /* Find the Key structure for an address.  */
 819 static struct tcp_md5sig_key *
 820                         tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
 821 {
 822         struct tcp_sock *tp = tcp_sk(sk);
 823         int i;
 824
 825         if (!tp->md5sig_info || !tp->md5sig_info->entries4)
 826                 return NULL;
 827         for (i = 0; i < tp->md5sig_info->entries4; i++) {
 828                 if (tp->md5sig_info->keys4[i].addr == addr)
 829                         return (struct tcp_md5sig_key *)
 830                                                 &tp->md5sig_info->keys4[i];
 831         }
 832         return NULL;
 833 }
 834
 835 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
 836                                          struct sock *addr_sk)
 837 {
 838         return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr);
 839 }
 840
 841 EXPORT_SYMBOL(tcp_v4_md5_lookup);
 842
 843 struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
 844                                                struct request_sock *req)
 845 {
 846         return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
 847 }
 848
 849 /* This can be called on a newly created socket, from other files */
 850 int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
 851                       u8 *newkey, u8 newkeylen)
 852 {
 853         /* Add Key to the list */
 854         struct tcp4_md5sig_key *key;
 855         struct tcp_sock *tp = tcp_sk(sk);
 856         struct tcp4_md5sig_key *keys;
 857
 858         key = (struct tcp4_md5sig_key *) tcp_v4_md5_do_lookup(sk, addr);
 859         if (key) {
 860                 /* Pre-existing entry - just update that one. */
 861                 kfree (key->key);
 862                 key->key = newkey;
 863                 key->keylen = newkeylen;
 864         } else {
 865                 if (!tp->md5sig_info) {
 866                         tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), GFP_ATOMIC);
 867                         if (!tp->md5sig_info) {
 868                                 kfree(newkey);
 869                                 return -ENOMEM;
 870                         }
 871                 }
 872                 if (tcp_alloc_md5sig_pool() == NULL) {
 873                         kfree(newkey);
 874                         return -ENOMEM;
 875                 }
 876                 if (tp->md5sig_info->alloced4 == tp->md5sig_info->entries4) {
 877                         keys = kmalloc((sizeof(struct tcp4_md5sig_key) *
 878                                        (tp->md5sig_info->entries4 + 1)), GFP_ATOMIC);
 879                         if (!keys) {
 880                                 kfree(newkey);
 881                                 tcp_free_md5sig_pool();
 882                                 return -ENOMEM;
 883                         }
 884
 885                         if (tp->md5sig_info->entries4)
 886                                 memcpy(keys, tp->md5sig_info->keys4,
 887                                        (sizeof (struct tcp4_md5sig_key) *
 888                                         tp->md5sig_info->entries4));
 889
 890                         /* Free old key list, and reference new one */
 891                         if (tp->md5sig_info->keys4)
 892                                 kfree(tp->md5sig_info->keys4);
 893                         tp->md5sig_info->keys4 = keys;
 894                         tp->md5sig_info->alloced4++;
 895                 }
 896                 tp->md5sig_info->entries4++;
 897                 tp->md5sig_info->keys4[tp->md5sig_info->entries4 - 1].addr = addr;
 898                 tp->md5sig_info->keys4[tp->md5sig_info->entries4 - 1].key = newkey;
 899                 tp->md5sig_info->keys4[tp->md5sig_info->entries4 - 1].keylen = newkeylen;
 900         }
 901         return 0;
 902 }
 903
 904 EXPORT_SYMBOL(tcp_v4_md5_do_add);
 905
 906 static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
 907                                u8 *newkey, u8 newkeylen)
 908 {
 909         return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr,
 910                                  newkey, newkeylen);
 911 }
 912
 913 int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
 914 {
 915         struct tcp_sock *tp = tcp_sk(sk);
 916         int i;
 917
 918         for (i = 0; i < tp->md5sig_info->entries4; i++) {
 919                 if (tp->md5sig_info->keys4[i].addr == addr) {
 920                         /* Free the key */
 921                         kfree(tp->md5sig_info->keys4[i].key);
 922                         tp->md5sig_info->entries4--;
 923
 924                         if (tp->md5sig_info->entries4 == 0) {
 925                                 kfree(tp->md5sig_info->keys4);
 926                                 tp->md5sig_info->keys4 = NULL;
 927                         } else if (tp->md5sig_info->entries4 != i) {
 928                                 /* Need to do some manipulation */
 929                                 memcpy(&tp->md5sig_info->keys4[i],
 930                                        &tp->md5sig_info->keys4[i+1],
 931                                        (tp->md5sig_info->entries4 - i) *
 932                                         sizeof(struct tcp4_md5sig_key));
 933                         }
 934                         tcp_free_md5sig_pool();
 935                         return 0;
 936                 }
 937         }
 938         return -ENOENT;
 939 }
 940
 941 EXPORT_SYMBOL(tcp_v4_md5_do_del);
 942
 943 static void tcp_v4_clear_md5_list(struct sock *sk)
 944 {
 945         struct tcp_sock *tp = tcp_sk(sk);
 946
 947         /* Free each key, then the set of key keys,
 948          * the crypto element, and then decrement our
 949          * hold on the last resort crypto.
 950          */
 951         if (tp->md5sig_info->entries4) {
 952                 int i;
 953                 for (i = 0; i < tp->md5sig_info->entries4; i++)
 954                         kfree(tp->md5sig_info->keys4[i].key);
 955                 tp->md5sig_info->entries4 = 0;
 956                 tcp_free_md5sig_pool();
 957         }
 958         if (tp->md5sig_info->keys4) {
 959                 kfree(tp->md5sig_info->keys4);
 960                 tp->md5sig_info->keys4 = NULL;
 961                 tp->md5sig_info->alloced4  = 0;
 962         }
 963 }
 964
 965 static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
 966                                  int optlen)
 967 {
 968         struct tcp_md5sig cmd;
 969         struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
 970         u8 *newkey;
 971
 972         if (optlen < sizeof(cmd))
 973                 return -EINVAL;
 974
 975         if (copy_from_user(&cmd, optval, sizeof(cmd)))
 976                 return -EFAULT;
 977
 978         if (sin->sin_family != AF_INET)
 979                 return -EINVAL;
 980
 981         if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
 982                 if (!tcp_sk(sk)->md5sig_info)
 983                         return -ENOENT;
 984                 return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
 985         }
 986
 987         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
 988                 return -EINVAL;
 989
 990         if (!tcp_sk(sk)->md5sig_info) {
 991                 struct tcp_sock *tp = tcp_sk(sk);
 992                 struct tcp_md5sig_info *p = kzalloc(sizeof(*p), GFP_KERNEL);
 993
 994                 if (!p)
 995                         return -EINVAL;
 996
 997                 tp->md5sig_info = p;
 998
 999         }
1000
1001         newkey = kmalloc(cmd.tcpm_keylen, GFP_KERNEL);
1002         if (!newkey)
1003                 return -ENOMEM;
1004         memcpy(newkey, cmd.tcpm_key, cmd.tcpm_keylen);
1005         return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
1006                                  newkey, cmd.tcpm_keylen);
1007 }
1008
1009 static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
1010                                    __be32 saddr, __be32 daddr,
1011                                    struct tcphdr *th, int protocol,
1012                                    int tcplen)
1013 {
1014         struct scatterlist sg[4];
1015         __u16 data_len;
1016         int block = 0;
1017 #ifdef CONFIG_TCP_MD5SIG_DEBUG
1018         int i;
1019 #endif
1020         __u16 old_checksum;
1021         struct tcp_md5sig_pool *hp;
1022         struct tcp4_pseudohdr *bp;
1023         struct hash_desc *desc;
1024         int err;
1025         unsigned int nbytes = 0;
1026
1027         /*
1028          * Okay, so RFC2385 is turned on for this connection,
1029          * so we need to generate the MD5 hash for the packet now.
1030          */
1031
1032         hp = tcp_get_md5sig_pool();
1033         if (!hp)
1034                 goto clear_hash_noput;
1035
1036         bp = &hp->md5_blk.ip4;
1037         desc = &hp->md5_desc;
1038
1039         /*
1040          * 1. the TCP pseudo-header (in the order: source IP address,
1041          * destination IP address, zero-padded protocol number, and
1042          * segment length)
1043          */
1044         bp->saddr = saddr;
1045         bp->daddr = daddr;
1046         bp->pad = 0;
1047         bp->protocol = protocol;
1048         bp->len = htons(tcplen);
1049         sg_set_buf(&sg[block++], bp, sizeof(*bp));
1050         nbytes += sizeof(*bp);
1051
1052 #ifdef CONFIG_TCP_MD5SIG_DEBUG
1053         printk("Calcuating hash for: ");
1054         for (i = 0; i < sizeof(*bp); i++)
1055                 printk("%02x ", (unsigned int)((unsigned char *)bp)[i]);
1056         printk(" ");
1057 #endif
1058
1059         /* 2. the TCP header, excluding options, and assuming a
1060          * checksum of zero/
1061          */
1062         old_checksum = th->check;
1063         th->check = 0;
1064         sg_set_buf(&sg[block++], th, sizeof(struct tcphdr));
1065         nbytes += sizeof(struct tcphdr);
1066 #ifdef CONFIG_TCP_MD5SIG_DEBUG
1067         for (i = 0; i < sizeof(struct tcphdr); i++)
1068                 printk(" %02x", (unsigned int)((unsigned char *)th)[i]);
1069 #endif
1070         /* 3. the TCP segment data (if any) */
1071         data_len = tcplen - (th->doff << 2);
1072         if (data_len > 0) {
1073                 unsigned char *data = (unsigned char *)th + (th->doff << 2);
1074                 sg_set_buf(&sg[block++], data, data_len);
1075                 nbytes += data_len;
1076         }
1077
1078         /* 4. an independently-specified key or password, known to both
1079          * TCPs and presumably connection-specific
1080          */
1081         sg_set_buf(&sg[block++], key->key, key->keylen);
1082         nbytes += key->keylen;
1083
1084 #ifdef CONFIG_TCP_MD5SIG_DEBUG
1085         printk("  and password: ");
1086         for (i = 0; i < key->keylen; i++)
1087                 printk("%02x ", (unsigned int)key->key[i]);
1088 #endif
1089
1090         /* Now store the Hash into the packet */
1091         err = crypto_hash_init(desc);
1092         if (err)
1093                 goto clear_hash;
1094         err = crypto_hash_update(desc, sg, nbytes);
1095         if (err)
1096                 goto clear_hash;
1097         err = crypto_hash_final(desc, md5_hash);
1098         if (err)
1099                 goto clear_hash;
1100
1101         /* Reset header, and free up the crypto */
1102         tcp_put_md5sig_pool();
1103         th->check = old_checksum;
1104
1105 out:
1106 #ifdef CONFIG_TCP_MD5SIG_DEBUG
1107         printk(" result:");
1108         for (i = 0; i < 16; i++)
1109                 printk(" %02x", (unsigned int)(((u8*)md5_hash)[i]));
1110         printk("\n");
1111 #endif
1112         return 0;
1113 clear_hash:
1114         tcp_put_md5sig_pool();
1115 clear_hash_noput:
1116         memset(md5_hash, 0, 16);
1117         goto out;
1118 }
1119
1120 int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
1121                          struct sock *sk,
1122                          struct dst_entry *dst,
1123                          struct request_sock *req,
1124                          struct tcphdr *th, int protocol,
1125                          int tcplen)
1126 {
1127         __be32 saddr, daddr;
1128
1129         if (sk) {
1130                 saddr = inet_sk(sk)->saddr;
1131                 daddr = inet_sk(sk)->daddr;
1132         } else {
1133                 struct rtable *rt = (struct rtable *)dst;
1134                 BUG_ON(!rt);
1135                 saddr = rt->rt_src;
1136                 daddr = rt->rt_dst;
1137         }
1138         return tcp_v4_do_calc_md5_hash(md5_hash, key,
1139                                        saddr, daddr,
1140                                        th, protocol, tcplen);
1141 }
1142
1143 EXPORT_SYMBOL(tcp_v4_calc_md5_hash);
1144
1145 static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
1146 {
1147         /*
1148          * This gets called for each TCP segment that arrives
1149          * so we want to be efficient.
1150          * We have 3 drop cases:
1151          * o No MD5 hash and one expected.
1152          * o MD5 hash and we're not expecting one.
1153          * o MD5 hash and its wrong.
1154          */
1155         __u8 *hash_location = NULL;
1156         struct tcp_md5sig_key *hash_expected;
1157         struct iphdr *iph = skb->nh.iph;
1158         struct tcphdr *th = skb->h.th;
1159         int length = (th->doff << 2) - sizeof(struct tcphdr);
1160         int genhash;
1161         unsigned char *ptr;
1162         unsigned char newhash[16];
1163
1164         hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
1165
1166         /*
1167          * If the TCP option length is less than the TCP_MD5SIG
1168          * option length, then we can shortcut
1169          */
1170         if (length < TCPOLEN_MD5SIG) {
1171                 if (hash_expected)
1172                         return 1;
1173                 else
1174                         return 0;
1175         }
1176
1177         /* Okay, we can't shortcut - we have to grub through the options */
1178         ptr = (unsigned char *)(th + 1);
1179         while (length > 0) {
1180                 int opcode = *ptr++;
1181                 int opsize;
1182
1183                 switch (opcode) {
1184                 case TCPOPT_EOL:
1185                         goto done_opts;
1186                 case TCPOPT_NOP:
1187                         length--;
1188                         continue;
1189                 default:
1190                         opsize = *ptr++;
1191                         if (opsize < 2)
1192                                 goto done_opts;
1193                         if (opsize > length)
1194                                 goto done_opts;
1195
1196                         if (opcode == TCPOPT_MD5SIG) {
1197                                 hash_location = ptr;
1198                                 goto done_opts;
1199                         }
1200                 }
1201                 ptr += opsize-2;
1202                 length -= opsize;
1203         }
1204 done_opts:
1205         /* We've parsed the options - do we have a hash? */
1206         if (!hash_expected && !hash_location)
1207                 return 0;
1208
1209         if (hash_expected && !hash_location) {
1210                 LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found "
1211                                "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
1212                                NIPQUAD(iph->saddr), ntohs(th->source),
1213                                NIPQUAD(iph->daddr), ntohs(th->dest));
1214                 return 1;
1215         }
1216
1217         if (!hash_expected && hash_location) {
1218                 LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found "
1219                                "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
1220                                NIPQUAD(iph->saddr), ntohs(th->source),
1221                                NIPQUAD(iph->daddr), ntohs(th->dest));
1222                 return 1;
1223         }
1224
1225         /* Okay, so this is hash_expected and hash_location -
1226          * so we need to calculate the checksum.
1227          */
1228         genhash = tcp_v4_do_calc_md5_hash(newhash,
1229                                           hash_expected,
1230                                           iph->saddr, iph->daddr,
1231                                           th, sk->sk_protocol,
1232                                           skb->len);
1233
1234         if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1235                 if (net_ratelimit()) {
1236                         printk(KERN_INFO "MD5 Hash failed for "
1237                                "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)%s\n",
1238                                NIPQUAD(iph->saddr), ntohs(th->source),
1239                                NIPQUAD(iph->daddr), ntohs(th->dest),
1240                                genhash ? " tcp_v4_calc_md5_hash failed" : "");
1241 #ifdef CONFIG_TCP_MD5SIG_DEBUG
1242                         do {
1243                                 int i;
1244                                 printk("Received: ");
1245                                 for (i = 0; i < 16; i++)
1246                                         printk("%02x ",
1247                                                0xff & (int)hash_location[i]);
1248                                 printk("\n");
1249                                 printk("Calculated: ");
1250                                 for (i = 0; i < 16; i++)
1251                                         printk("%02x ", 0xff & (int)newhash[i]);
1252                                 printk("\n");
1253                         } while(0);
1254 #endif
1255                 }
1256                 return 1;
1257         }
1258         return 0;
1259 }
1260
1261 #endif
1262
1263 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1264         .family         =       PF_INET,
1265         .obj_size       =       sizeof(struct tcp_request_sock),
1266         .rtx_syn_ack    =       tcp_v4_send_synack,
1267         .send_ack       =       tcp_v4_reqsk_send_ack,
1268         .destructor     =       tcp_v4_reqsk_destructor,
1269         .send_reset     =       tcp_v4_send_reset,
1270 };
1271
1272 struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1273 #ifdef CONFIG_TCP_MD5SIG
1274         .md5_lookup     =       tcp_v4_reqsk_md5_lookup,
1275 #endif
1276 };
1277
1278 static struct timewait_sock_ops tcp_timewait_sock_ops = {
1279         .twsk_obj_size  = sizeof(struct tcp_timewait_sock),
1280         .twsk_unique    = tcp_twsk_unique,
1281         .twsk_destructor= tcp_twsk_destructor,
1282 };
1283
1284 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1285 {
1286         struct inet_request_sock *ireq;
1287         struct tcp_options_received tmp_opt;
1288         struct request_sock *req;
1289         __be32 saddr = skb->nh.iph->saddr;
1290         __be32 daddr = skb->nh.iph->daddr;
1291         __u32 isn = TCP_SKB_CB(skb)->when;
1292         struct dst_entry *dst = NULL;
1293 #ifdef CONFIG_SYN_COOKIES
1294         int want_cookie = 0;
1295 #else
1296 #define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1297 #endif
1298
1299         /* Never answer to SYNs send to broadcast or multicast */
1300         if (((struct rtable *)skb->dst)->rt_flags &
1301             (RTCF_BROADCAST | RTCF_MULTICAST))
1302                 goto drop;
1303
1304         /* TW buckets are converted to open requests without
1305          * limitations, they conserve resources and peer is
1306          * evidently real one.
1307          */
1308         if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1309 #ifdef CONFIG_SYN_COOKIES
1310                 if (sysctl_tcp_syncookies) {
1311                         want_cookie = 1;
1312                 } else
1313 #endif
1314                 goto drop;
1315         }
1316
1317         /* Accept backlog is full. If we have already queued enough
1318          * of warm entries in syn queue, drop request. It is better than
1319          * clogging syn queue with openreqs with exponentially increasing
1320          * timeout.
1321          */
1322         if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1323                 goto drop;
1324
1325         req = reqsk_alloc(&tcp_request_sock_ops);
1326         if (!req)
1327                 goto drop;
1328
1329 #ifdef CONFIG_TCP_MD5SIG
1330         tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1331 #endif
1332
1333         tcp_clear_options(&tmp_opt);
1334         tmp_opt.mss_clamp = 536;
1335         tmp_opt.user_mss  = tcp_sk(sk)->rx_opt.user_mss;
1336
1337         tcp_parse_options(skb, &tmp_opt, 0);
1338
1339         if (want_cookie) {
1340                 tcp_clear_options(&tmp_opt);
1341                 tmp_opt.saw_tstamp = 0;
1342         }
1343
1344         if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
1345                 /* Some OSes (unknown ones, but I see them on web server, which
1346                  * contains information interesting only for windows'
1347                  * users) do not send their stamp in SYN. It is easy case.
1348                  * We simply do not advertise TS support.
1349                  */
1350                 tmp_opt.saw_tstamp = 0;
1351                 tmp_opt.tstamp_ok  = 0;
1352         }
1353         tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1354
1355         tcp_openreq_init(req, &tmp_opt, skb);
1356
1357         if (security_inet_conn_request(sk, skb, req))
1358                 goto drop_and_free;
1359
1360         ireq = inet_rsk(req);
1361         ireq->loc_addr = daddr;
1362         ireq->rmt_addr = saddr;
1363         ireq->opt = tcp_v4_save_options(sk, skb);
1364         if (!want_cookie)
1365                 TCP_ECN_create_request(req, skb->h.th);
1366
1367         if (want_cookie) {
1368 #ifdef CONFIG_SYN_COOKIES
1369                 syn_flood_warning(skb);
1370 #endif
1371                 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1372         } else if (!isn) {
1373                 struct inet_peer *peer = NULL;
1374
1375                 /* VJ's idea. We save last timestamp seen
1376                  * from the destination in peer table, when entering
1377                  * state TIME-WAIT, and check against it before
1378                  * accepting new connection request.
1379                  *
1380                  * If "isn" is not zero, this request hit alive
1381                  * timewait bucket, so that all the necessary checks
1382                  * are made in the function processing timewait state.
1383                  */
1384                 if (tmp_opt.saw_tstamp &&
1385                     tcp_death_row.sysctl_tw_recycle &&
1386                     (dst = inet_csk_route_req(sk, req)) != NULL &&
1387                     (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1388                     peer->v4daddr == saddr) {
1389                         if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
1390                             (s32)(peer->tcp_ts - req->ts_recent) >
1391                                                         TCP_PAWS_WINDOW) {
1392                                 NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
1393                                 dst_release(dst);
1394                                 goto drop_and_free;
1395                         }
1396                 }
1397                 /* Kill the following clause, if you dislike this way. */
1398                 else if (!sysctl_tcp_syncookies &&
1399                          (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1400                           (sysctl_max_syn_backlog >> 2)) &&
1401                          (!peer || !peer->tcp_ts_stamp) &&
1402                          (!dst || !dst_metric(dst, RTAX_RTT))) {
1403                         /* Without syncookies last quarter of
1404                          * backlog is filled with destinations,
1405                          * proven to be alive.
1406                          * It means that we continue to communicate
1407                          * to destinations, already remembered
1408                          * to the moment of synflood.
1409                          */
1410                         LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
1411                                        "request from %u.%u.%u.%u/%u\n",
1412                                        NIPQUAD(saddr),
1413                                        ntohs(skb->h.th->source));
1414                         dst_release(dst);
1415                         goto drop_and_free;
1416                 }
1417
1418                 isn = tcp_v4_init_sequence(skb);
1419         }
1420         tcp_rsk(req)->snt_isn = isn;
1421
1422         if (tcp_v4_send_synack(sk, req, dst))
1423                 goto drop_and_free;
1424
1425         if (want_cookie) {
1426                 reqsk_free(req);
1427         } else {
1428                 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1429         }
1430         return 0;
1431
1432 drop_and_free:
1433         reqsk_free(req);
1434 drop:
1435         return 0;
1436 }
1437
1438
1439 /*
1440  * The three way handshake has completed - we got a valid synack -
1441  * now create the new socket.
1442  */
1443 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1444                                   struct request_sock *req,
1445                                   struct dst_entry *dst)
1446 {
1447         struct inet_request_sock *ireq;
1448         struct inet_sock *newinet;
1449         struct tcp_sock *newtp;
1450         struct sock *newsk;
1451 #ifdef CONFIG_TCP_MD5SIG
1452         struct tcp_md5sig_key *key;
1453 #endif
1454
1455         if (sk_acceptq_is_full(sk))
1456                 goto exit_overflow;
1457
1458         if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
1459                 goto exit;
1460
1461         newsk = tcp_create_openreq_child(sk, req, skb);
1462         if (!newsk)
1463                 goto exit;
1464
1465         newsk->sk_gso_type = SKB_GSO_TCPV4;
1466         sk_setup_caps(newsk, dst);
1467
1468         newtp                 = tcp_sk(newsk);
1469         newinet               = inet_sk(newsk);
1470         ireq                  = inet_rsk(req);
1471         newinet->daddr        = ireq->rmt_addr;
1472         newinet->rcv_saddr    = ireq->loc_addr;
1473         newinet->saddr        = ireq->loc_addr;
1474         newinet->opt          = ireq->opt;
1475         ireq->opt             = NULL;
1476         newinet->mc_index     = inet_iif(skb);
1477         newinet->mc_ttl       = skb->nh.iph->ttl;
1478         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1479         if (newinet->opt)
1480                 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
1481         newinet->id = newtp->write_seq ^ jiffies;
1482
1483         tcp_mtup_init(newsk);
1484         tcp_sync_mss(newsk, dst_mtu(dst));
1485         newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1486         tcp_initialize_rcv_mss(newsk);
1487
1488 #ifdef CONFIG_TCP_MD5SIG
1489         /* Copy over the MD5 key from the original socket */
1490         if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) {
1491                 /*
1492                  * We're using one, so create a matching key
1493                  * on the newsk structure. If we fail to get
1494                  * memory, then we end up not copying the key
1495                  * across. Shucks.
1496                  */
1497                 char *newkey = kmalloc(key->keylen, GFP_ATOMIC);
1498                 if (newkey) {
1499                         memcpy(newkey, key->key, key->keylen);
1500                         tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr,
1501                                           newkey, key->keylen);
1502                 }
1503         }
1504 #endif
1505
1506         __inet_hash(&tcp_hashinfo, newsk, 0);
1507         __inet_inherit_port(&tcp_hashinfo, sk, newsk);
1508
1509         return newsk;
1510
1511 exit_overflow:
1512         NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1513 exit:
1514         NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1515         dst_release(dst);
1516         return NULL;
1517 }
1518
1519 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1520 {
1521         struct tcphdr *th = skb->h.th;
1522         struct iphdr *iph = skb->nh.iph;
1523         struct sock *nsk;
1524         struct request_sock **prev;
1525         /* Find possible connection requests. */
1526         struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1527                                                        iph->saddr, iph->daddr);
1528         if (req)
1529                 return tcp_check_req(sk, skb, req, prev);
1530
1531         nsk = inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr,
1532                                       th->source, skb->nh.iph->daddr,
1533                                       th->dest, inet_iif(skb));
1534
1535         if (nsk) {
1536                 if (nsk->sk_state != TCP_TIME_WAIT) {
1537                         bh_lock_sock(nsk);
1538                         return nsk;
1539                 }
1540                 inet_twsk_put(inet_twsk(nsk));
1541                 return NULL;
1542         }
1543
1544 #ifdef CONFIG_SYN_COOKIES
1545         if (!th->rst && !th->syn && th->ack)
1546                 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1547 #endif
1548         return sk;
1549 }
1550
1551 static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1552 {
1553         if (skb->ip_summed == CHECKSUM_COMPLETE) {
1554                 if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
1555                                   skb->nh.iph->daddr, skb->csum)) {
1556                         skb->ip_summed = CHECKSUM_UNNECESSARY;
1557                         return 0;
1558                 }
1559         }
1560
1561         skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, skb->nh.iph->daddr,
1562                                        skb->len, IPPROTO_TCP, 0);
1563
1564         if (skb->len <= 76) {
1565                 return __skb_checksum_complete(skb);
1566         }
1567         return 0;
1568 }
1569
1570
1571 /* The socket must have it's spinlock held when we get
1572  * here.
1573  *
1574  * We have a potential double-lock case here, so even when
1575  * doing backlog processing we use the BH locking scheme.
1576  * This is because we cannot sleep with the original spinlock
1577  * held.
1578  */
1579 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1580 {
1581         struct sock *rsk;
1582 #ifdef CONFIG_TCP_MD5SIG
1583         /*
1584          * We really want to reject the packet as early as possible
1585          * if:
1586          *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1587          *  o There is an MD5 option and we're not expecting one
1588          */
1589         if (tcp_v4_inbound_md5_hash(sk, skb))
1590                 goto discard;
1591 #endif
1592
1593         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1594                 TCP_CHECK_TIMER(sk);
1595                 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) {
1596                         rsk = sk;
1597                         goto reset;
1598                 }
1599                 TCP_CHECK_TIMER(sk);
1600                 return 0;
1601         }
1602
1603         if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb))
1604                 goto csum_err;
1605
1606         if (sk->sk_state == TCP_LISTEN) {
1607                 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1608                 if (!nsk)
1609                         goto discard;
1610
1611                 if (nsk != sk) {
1612                         if (tcp_child_process(sk, nsk, skb)) {
1613                                 rsk = nsk;
1614                                 goto reset;
1615                         }
1616                         return 0;
1617                 }
1618         }
1619
1620         TCP_CHECK_TIMER(sk);
1621         if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len)) {
1622                 rsk = sk;
1623                 goto reset;
1624         }
1625         TCP_CHECK_TIMER(sk);
1626         return 0;
1627
1628 reset:
1629         tcp_v4_send_reset(rsk, skb);
1630 discard:
1631         kfree_skb(skb);
1632         /* Be careful here. If this function gets more complicated and
1633          * gcc suffers from register pressure on the x86, sk (in %ebx)
1634          * might be destroyed here. This current version compiles correctly,
1635          * but you have been warned.
1636          */
1637         return 0;
1638
1639 csum_err:
1640         TCP_INC_STATS_BH(TCP_MIB_INERRS);
1641         goto discard;
1642 }
1643
1644 /*
1645  *      From tcp_input.c
1646  */
1647
1648 int tcp_v4_rcv(struct sk_buff *skb)
1649 {
1650         struct tcphdr *th;
1651         struct sock *sk;
1652         int ret;
1653
1654         if (skb->pkt_type != PACKET_HOST)
1655                 goto discard_it;
1656
1657         /* Count it even if it's bad */
1658         TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1659
1660         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1661                 goto discard_it;
1662
1663         th = skb->h.th;
1664
1665         if (th->doff < sizeof(struct tcphdr) / 4)
1666                 goto bad_packet;
1667         if (!pskb_may_pull(skb, th->doff * 4))
1668                 goto discard_it;
1669
1670         /* An explanation is required here, I think.
1671          * Packet length and doff are validated by header prediction,
1672          * provided case of th->doff==0 is eliminated.
1673          * So, we defer the checks. */
1674         if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1675              tcp_v4_checksum_init(skb)))
1676                 goto bad_packet;
1677
1678         th = skb->h.th;
1679         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1680         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1681                                     skb->len - th->doff * 4);
1682         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1683         TCP_SKB_CB(skb)->when    = 0;
1684         TCP_SKB_CB(skb)->flags   = skb->nh.iph->tos;
1685         TCP_SKB_CB(skb)->sacked  = 0;
1686
1687         sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source,
1688                            skb->nh.iph->daddr, th->dest,
1689                            inet_iif(skb));
1690
1691         if (!sk)
1692                 goto no_tcp_socket;
1693
1694 process:
1695         if (sk->sk_state == TCP_TIME_WAIT)
1696                 goto do_time_wait;
1697
1698         if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1699                 goto discard_and_relse;
1700         nf_reset(skb);
1701
1702         if (sk_filter(sk, skb))
1703                 goto discard_and_relse;
1704
1705         skb->dev = NULL;
1706
1707         bh_lock_sock_nested(sk);
1708         ret = 0;
1709         if (!sock_owned_by_user(sk)) {
1710 #ifdef CONFIG_NET_DMA
1711                 struct tcp_sock *tp = tcp_sk(sk);
1712                 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1713                         tp->ucopy.dma_chan = get_softnet_dma();
1714                 if (tp->ucopy.dma_chan)
1715                         ret = tcp_v4_do_rcv(sk, skb);
1716                 else
1717 #endif
1718                 {
1719                         if (!tcp_prequeue(sk, skb))
1720                         ret = tcp_v4_do_rcv(sk, skb);
1721                 }
1722         } else
1723                 sk_add_backlog(sk, skb);
1724         bh_unlock_sock(sk);
1725
1726         sock_put(sk);
1727
1728         return ret;
1729
1730 no_tcp_socket:
1731         if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1732                 goto discard_it;
1733
1734         if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1735 bad_packet:
1736                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1737         } else {
1738                 tcp_v4_send_reset(NULL, skb);
1739         }
1740
1741 discard_it:
1742         /* Discard frame. */
1743         kfree_skb(skb);
1744         return 0;
1745
1746 discard_and_relse:
1747         sock_put(sk);
1748         goto discard_it;
1749
1750 do_time_wait:
1751         if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1752                 inet_twsk_put(inet_twsk(sk));
1753                 goto discard_it;
1754         }
1755
1756         if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1757                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1758                 inet_twsk_put(inet_twsk(sk));
1759                 goto discard_it;
1760         }
1761         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1762         case TCP_TW_SYN: {
1763                 struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
1764                                                         skb->nh.iph->daddr,
1765                                                         th->dest,
1766                                                         inet_iif(skb));
1767                 if (sk2) {
1768                         inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1769                         inet_twsk_put(inet_twsk(sk));
1770                         sk = sk2;
1771                         goto process;
1772                 }
1773                 /* Fall through to ACK */
1774         }
1775         case TCP_TW_ACK:
1776                 tcp_v4_timewait_ack(sk, skb);
1777                 break;
1778         case TCP_TW_RST:
1779                 goto no_tcp_socket;
1780         case TCP_TW_SUCCESS:;
1781         }
1782         goto discard_it;
1783 }
1784
1785 /* VJ's idea. Save last timestamp seen from this destination
1786  * and hold it at least for normal timewait interval to use for duplicate
1787  * segment detection in subsequent connections, before they enter synchronized
1788  * state.
1789  */
1790
1791 int tcp_v4_remember_stamp(struct sock *sk)
1792 {
1793         struct inet_sock *inet = inet_sk(sk);
1794         struct tcp_sock *tp = tcp_sk(sk);
1795         struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1796         struct inet_peer *peer = NULL;
1797         int release_it = 0;
1798
1799         if (!rt || rt->rt_dst != inet->daddr) {
1800                 peer = inet_getpeer(inet->daddr, 1);
1801                 release_it = 1;
1802         } else {
1803                 if (!rt->peer)
1804                         rt_bind_peer(rt, 1);
1805                 peer = rt->peer;
1806         }
1807
1808         if (peer) {
1809                 if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
1810                     (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
1811                      peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
1812                         peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
1813                         peer->tcp_ts = tp->rx_opt.ts_recent;
1814                 }
1815                 if (release_it)
1816                         inet_putpeer(peer);
1817                 return 1;
1818         }
1819
1820         return 0;
1821 }
1822
1823 int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1824 {
1825         struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
1826
1827         if (peer) {
1828                 const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1829
1830                 if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
1831                     (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
1832                      peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
1833                         peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
1834                         peer->tcp_ts       = tcptw->tw_ts_recent;
1835                 }
1836                 inet_putpeer(peer);
1837                 return 1;
1838         }
1839
1840         return 0;
1841 }
1842
1843 struct inet_connection_sock_af_ops ipv4_specific = {
1844         .queue_xmit        = ip_queue_xmit,
1845         .send_check        = tcp_v4_send_check,
1846         .rebuild_header    = inet_sk_rebuild_header,
1847         .conn_request      = tcp_v4_conn_request,
1848         .syn_recv_sock     = tcp_v4_syn_recv_sock,
1849         .remember_stamp    = tcp_v4_remember_stamp,
1850         .net_header_len    = sizeof(struct iphdr),
1851         .setsockopt        = ip_setsockopt,
1852         .getsockopt        = ip_getsockopt,
1853         .addr2sockaddr     = inet_csk_addr2sockaddr,
1854         .sockaddr_len      = sizeof(struct sockaddr_in),
1855 #ifdef CONFIG_COMPAT
1856         .compat_setsockopt = compat_ip_setsockopt,
1857         .compat_getsockopt = compat_ip_getsockopt,
1858 #endif
1859 };
1860
1861 struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1862 #ifdef CONFIG_TCP_MD5SIG
1863         .md5_lookup             = tcp_v4_md5_lookup,
1864         .calc_md5_hash          = tcp_v4_calc_md5_hash,
1865         .md5_add                = tcp_v4_md5_add_func,
1866         .md5_parse              = tcp_v4_parse_md5_keys,
1867 #endif
1868 };
1869
1870 /* NOTE: A lot of things set to zero explicitly by call to
1871  *       sk_alloc() so need not be done here.
1872  */
1873 static int tcp_v4_init_sock(struct sock *sk)
1874 {
1875         struct inet_connection_sock *icsk = inet_csk(sk);
1876         struct tcp_sock *tp = tcp_sk(sk);
1877
1878         skb_queue_head_init(&tp->out_of_order_queue);
1879         tcp_init_xmit_timers(sk);
1880         tcp_prequeue_init(tp);
1881
1882         icsk->icsk_rto = TCP_TIMEOUT_INIT;
1883         tp->mdev = TCP_TIMEOUT_INIT;
1884
1885         /* So many TCP implementations out there (incorrectly) count the
1886          * initial SYN frame in their delayed-ACK and congestion control
1887          * algorithms that we must have the following bandaid to talk
1888          * efficiently to them.  -DaveM
1889          */
1890         tp->snd_cwnd = 2;
1891
1892         /* See draft-stevens-tcpca-spec-01 for discussion of the
1893          * initialization of these values.
1894          */
1895         tp->snd_ssthresh = 0x7fffffff;  /* Infinity */
1896         tp->snd_cwnd_clamp = ~0;
1897         tp->mss_cache = 536;
1898
1899         tp->reordering = sysctl_tcp_reordering;
1900         icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1901
1902         sk->sk_state = TCP_CLOSE;
1903
1904         sk->sk_write_space = sk_stream_write_space;
1905         sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1906
1907         icsk->icsk_af_ops = &ipv4_specific;
1908         icsk->icsk_sync_mss = tcp_sync_mss;
1909 #ifdef CONFIG_TCP_MD5SIG
1910         tp->af_specific = &tcp_sock_ipv4_specific;
1911 #endif
1912
1913         sk->sk_sndbuf = sysctl_tcp_wmem[1];
1914         sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1915
1916         atomic_inc(&tcp_sockets_allocated);
1917
1918         return 0;
1919 }
1920
1921 int tcp_v4_destroy_sock(struct sock *sk)
1922 {
1923         struct tcp_sock *tp = tcp_sk(sk);
1924
1925         tcp_clear_xmit_timers(sk);
1926
1927         tcp_cleanup_congestion_control(sk);
1928
1929         /* Cleanup up the write buffer. */
1930         sk_stream_writequeue_purge(sk);
1931
1932         /* Cleans up our, hopefully empty, out_of_order_queue. */
1933         __skb_queue_purge(&tp->out_of_order_queue);
1934
1935 #ifdef CONFIG_TCP_MD5SIG
1936         /* Clean up the MD5 key list, if any */
1937         if (tp->md5sig_info) {
1938                 tcp_v4_clear_md5_list(sk);
1939                 kfree(tp->md5sig_info);
1940                 tp->md5sig_info = NULL;
1941         }
1942 #endif
1943
1944 #ifdef CONFIG_NET_DMA
1945         /* Cleans up our sk_async_wait_queue */
1946         __skb_queue_purge(&sk->sk_async_wait_queue);
1947 #endif
1948
1949         /* Clean prequeue, it must be empty really */
1950         __skb_queue_purge(&tp->ucopy.prequeue);
1951
1952         /* Clean up a referenced TCP bind bucket. */
1953         if (inet_csk(sk)->icsk_bind_hash)
1954                 inet_put_port(&tcp_hashinfo, sk);
1955
1956         /*
1957          * If sendmsg cached page exists, toss it.
1958          */
1959         if (sk->sk_sndmsg_page) {
1960                 __free_page(sk->sk_sndmsg_page);
1961                 sk->sk_sndmsg_page = NULL;
1962         }
1963
1964         atomic_dec(&tcp_sockets_allocated);
1965
1966         return 0;
1967 }
1968
1969 EXPORT_SYMBOL(tcp_v4_destroy_sock);
1970
1971 #ifdef CONFIG_PROC_FS
1972 /* Proc filesystem TCP sock list dumping. */
1973
1974 static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
1975 {
1976         return hlist_empty(head) ? NULL :
1977                 list_entry(head->first, struct inet_timewait_sock, tw_node);
1978 }
1979
1980 static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1981 {
1982         return tw->tw_node.next ?
1983                 hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1984 }
1985
1986 static void *listening_get_next(struct seq_file *seq, void *cur)
1987 {
1988         struct inet_connection_sock *icsk;
1989         struct hlist_node *node;
1990         struct sock *sk = cur;
1991         struct tcp_iter_state* st = seq->private;
1992
1993         if (!sk) {
1994                 st->bucket = 0;
1995                 sk = sk_head(&tcp_hashinfo.listening_hash[0]);
1996                 goto get_sk;
1997         }
1998
1999         ++st->num;
2000
2001         if (st->state == TCP_SEQ_STATE_OPENREQ) {
2002                 struct request_sock *req = cur;
2003
2004                 icsk = inet_csk(st->syn_wait_sk);
2005                 req = req->dl_next;
2006                 while (1) {
2007                         while (req) {
2008                                 if (req->rsk_ops->family == st->family) {
2009                                         cur = req;
2010                                         goto out;
2011                                 }
2012                                 req = req->dl_next;
2013                         }
2014                         if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
2015                                 break;
2016 get_req:
2017                         req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
2018                 }
2019                 sk        = sk_next(st->syn_wait_sk);
2020                 st->state = TCP_SEQ_STATE_LISTENING;
2021                 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2022         } else {
2023                 icsk = inet_csk(sk);
2024                 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2025                 if (reqsk_queue_len(&icsk->icsk_accept_queue))
2026                         goto start_req;
2027                 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2028                 sk = sk_next(sk);
2029         }
2030 get_sk:
2031         sk_for_each_from(sk, node) {
2032                 if (sk->sk_family == st->family) {
2033                         cur = sk;
2034                         goto out;
2035                 }
2036                 icsk = inet_csk(sk);
2037                 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2038                 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2039 start_req:
2040                         st->uid         = sock_i_uid(sk);
2041                         st->syn_wait_sk = sk;
2042                         st->state       = TCP_SEQ_STATE_OPENREQ;
2043                         st->sbucket     = 0;
2044                         goto get_req;
2045                 }
2046                 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2047         }
2048         if (++st->bucket < INET_LHTABLE_SIZE) {
2049                 sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]);
2050                 goto get_sk;
2051         }
2052         cur = NULL;
2053 out:
2054         return cur;
2055 }
2056
2057 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2058 {
2059         void *rc = listening_get_next(seq, NULL);
2060
2061         while (rc && *pos) {
2062                 rc = listening_get_next(seq, rc);
2063                 --*pos;
2064         }
2065         return rc;
2066 }
2067
2068 static void *established_get_first(struct seq_file *seq)
2069 {
2070         struct tcp_iter_state* st = seq->private;
2071         void *rc = NULL;
2072
2073         for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
2074                 struct sock *sk;
2075                 struct hlist_node *node;
2076                 struct inet_timewait_sock *tw;
2077
2078                 /* We can reschedule _before_ having picked the target: */
2079                 cond_resched_softirq();
2080
2081                 read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
2082                 sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2083                         if (sk->sk_family != st->family) {
2084                                 continue;
2085                         }
2086                         rc = sk;
2087                         goto out;
2088                 }
2089                 st->state = TCP_SEQ_STATE_TIME_WAIT;
2090                 inet_twsk_for_each(tw, node,
2091                                    &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) {
2092                         if (tw->tw_family != st->family) {
2093                                 continue;
2094                         }
2095                         rc = tw;
2096                         goto out;
2097                 }
2098                 read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
2099                 st->state = TCP_SEQ_STATE_ESTABLISHED;
2100         }
2101 out:
2102         return rc;
2103 }
2104
2105 static void *established_get_next(struct seq_file *seq, void *cur)
2106 {
2107         struct sock *sk = cur;
2108         struct inet_timewait_sock *tw;
2109         struct hlist_node *node;
2110         struct tcp_iter_state* st = seq->private;
2111
2112         ++st->num;
2113
2114         if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2115                 tw = cur;
2116                 tw = tw_next(tw);
2117 get_tw:
2118                 while (tw && tw->tw_family != st->family) {
2119                         tw = tw_next(tw);
2120                 }
2121                 if (tw) {
2122                         cur = tw;
2123                         goto out;
2124                 }
2125                 read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
2126                 st->state = TCP_SEQ_STATE_ESTABLISHED;
2127
2128                 /* We can reschedule between buckets: */
2129                 cond_resched_softirq();
2130
2131                 if (++st->bucket < tcp_hashinfo.ehash_size) {
2132                         read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
2133                         sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
2134                 } else {
2135                         cur = NULL;
2136                         goto out;
2137                 }
2138         } else
2139                 sk = sk_next(sk);
2140
2141         sk_for_each_from(sk, node) {
2142                 if (sk->sk_family == st->family)
2143                         goto found;
2144         }
2145
2146         st->state = TCP_SEQ_STATE_TIME_WAIT;
2147         tw = tw_head(&tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain);
2148         goto get_tw;
2149 found:
2150         cur = sk;
2151 out:
2152         return cur;
2153 }
2154
2155 static void *established_get_idx(struct seq_file *seq, loff_t pos)
2156 {
2157         void *rc = established_get_first(seq);
2158
2159         while (rc && pos) {
2160                 rc = established_get_next(seq, rc);
2161                 --pos;
2162         }
2163         return rc;
2164 }
2165
2166 static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2167 {
2168         void *rc;
2169         struct tcp_iter_state* st = seq->private;
2170
2171         inet_listen_lock(&tcp_hashinfo);
2172         st->state = TCP_SEQ_STATE_LISTENING;
2173         rc        = listening_get_idx(seq, &pos);
2174
2175         if (!rc) {
2176                 inet_listen_unlock(&tcp_hashinfo);
2177                 local_bh_disable();
2178                 st->state = TCP_SEQ_STATE_ESTABLISHED;
2179                 rc        = established_get_idx(seq, pos);
2180         }
2181
2182         return rc;
2183 }
2184
2185 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2186 {
2187         struct tcp_iter_state* st = seq->private;
2188         st->state = TCP_SEQ_STATE_LISTENING;
2189         st->num = 0;
2190         return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2191 }
2192
2193 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2194 {
2195         void *rc = NULL;
2196         struct tcp_iter_state* st;
2197
2198         if (v == SEQ_START_TOKEN) {
2199                 rc = tcp_get_idx(seq, 0);
2200                 goto out;
2201         }
2202         st = seq->private;
2203
2204         switch (st->state) {
2205         case TCP_SEQ_STATE_OPENREQ:
2206         case TCP_SEQ_STATE_LISTENING:
2207                 rc = listening_get_next(seq, v);
2208                 if (!rc) {
2209                         inet_listen_unlock(&tcp_hashinfo);
2210                         local_bh_disable();
2211                         st->state = TCP_SEQ_STATE_ESTABLISHED;
2212                         rc        = established_get_first(seq);
2213                 }
2214                 break;
2215         case TCP_SEQ_STATE_ESTABLISHED:
2216         case TCP_SEQ_STATE_TIME_WAIT:
2217                 rc = established_get_next(seq, v);
2218                 break;
2219         }
2220 out:
2221         ++*pos;
2222         return rc;
2223 }
2224
2225 static void tcp_seq_stop(struct seq_file *seq, void *v)
2226 {
2227         struct tcp_iter_state* st = seq->private;
2228
2229         switch (st->state) {
2230         case TCP_SEQ_STATE_OPENREQ:
2231                 if (v) {
2232                         struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2233                         read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2234                 }
2235         case TCP_SEQ_STATE_LISTENING:
2236                 if (v != SEQ_START_TOKEN)
2237                         inet_listen_unlock(&tcp_hashinfo);
2238                 break;
2239         case TCP_SEQ_STATE_TIME_WAIT:
2240         case TCP_SEQ_STATE_ESTABLISHED:
2241                 if (v)
2242                         read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
2243                 local_bh_enable();
2244                 break;
2245         }
2246 }
2247
2248 static int tcp_seq_open(struct inode *inode, struct file *file)
2249 {
2250         struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2251         struct seq_file *seq;
2252         struct tcp_iter_state *s;
2253         int rc;
2254
2255         if (unlikely(afinfo == NULL))
2256                 return -EINVAL;
2257
2258         s = kzalloc(sizeof(*s), GFP_KERNEL);
2259         if (!s)
2260                 return -ENOMEM;
2261         s->family               = afinfo->family;
2262         s->seq_ops.start        = tcp_seq_start;
2263         s->seq_ops.next         = tcp_seq_next;
2264         s->seq_ops.show         = afinfo->seq_show;
2265         s->seq_ops.stop         = tcp_seq_stop;
2266
2267         rc = seq_open(file, &s->seq_ops);
2268         if (rc)
2269                 goto out_kfree;
2270         seq          = file->private_data;
2271         seq->private = s;
2272 out:
2273         return rc;
2274 out_kfree:
2275         kfree(s);
2276         goto out;
2277 }
2278
2279 int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
2280 {
2281         int rc = 0;
2282         struct proc_dir_entry *p;
2283
2284         if (!afinfo)
2285                 return -EINVAL;
2286         afinfo->seq_fops->owner         = afinfo->owner;
2287         afinfo->seq_fops->open          = tcp_seq_open;
2288         afinfo->seq_fops->read          = seq_read;
2289         afinfo->seq_fops->llseek        = seq_lseek;
2290         afinfo->seq_fops->release       = seq_release_private;
2291
2292         p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
2293         if (p)
2294                 p->data = afinfo;
2295         else
2296                 rc = -ENOMEM;
2297         return rc;
2298 }
2299
2300 void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
2301 {
2302         if (!afinfo)
2303                 return;
2304         proc_net_remove(afinfo->name);
2305         memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
2306 }
2307
2308 static void get_openreq4(struct sock *sk, struct request_sock *req,
2309                          char *tmpbuf, int i, int uid)
2310 {
2311         const struct inet_request_sock *ireq = inet_rsk(req);
2312         int ttd = req->expires - jiffies;
2313
2314         sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2315                 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p",
2316                 i,
2317                 ireq->loc_addr,
2318                 ntohs(inet_sk(sk)->sport),
2319                 ireq->rmt_addr,
2320                 ntohs(ireq->rmt_port),
2321                 TCP_SYN_RECV,
2322                 0, 0, /* could print option size, but that is af dependent. */
2323                 1,    /* timers active (only the expire timer) */
2324                 jiffies_to_clock_t(ttd),
2325                 req->retrans,
2326                 uid,
2327                 0,  /* non standard timer */
2328                 0, /* open_requests have no inode */
2329                 atomic_read(&sk->sk_refcnt),
2330                 req);
2331 }
2332
2333 static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
2334 {
2335         int timer_active;
2336         unsigned long timer_expires;
2337         struct tcp_sock *tp = tcp_sk(sp);
2338         const struct inet_connection_sock *icsk = inet_csk(sp);
2339         struct inet_sock *inet = inet_sk(sp);
2340         __be32 dest = inet->daddr;
2341         __be32 src = inet->rcv_saddr;
2342         __u16 destp = ntohs(inet->dport);
2343         __u16 srcp = ntohs(inet->sport);
2344
2345         if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2346                 timer_active    = 1;
2347                 timer_expires   = icsk->icsk_timeout;
2348         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2349                 timer_active    = 4;
2350                 timer_expires   = icsk->icsk_timeout;
2351         } else if (timer_pending(&sp->sk_timer)) {
2352                 timer_active    = 2;
2353                 timer_expires   = sp->sk_timer.expires;
2354         } else {
2355                 timer_active    = 0;
2356                 timer_expires = jiffies;
2357         }
2358
2359         sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2360                         "%08X %5d %8d %lu %d %p %u %u %u %u %d",
2361                 i, src, srcp, dest, destp, sp->sk_state,
2362                 tp->write_seq - tp->snd_una,
2363                 sp->sk_state == TCP_LISTEN ? sp->sk_ack_backlog :
2364                                              (tp->rcv_nxt - tp->copied_seq),
2365                 timer_active,
2366                 jiffies_to_clock_t(timer_expires - jiffies),
2367                 icsk->icsk_retransmits,
2368                 sock_i_uid(sp),
2369                 icsk->icsk_probes_out,
2370                 sock_i_ino(sp),
2371                 atomic_read(&sp->sk_refcnt), sp,
2372                 icsk->icsk_rto,
2373                 icsk->icsk_ack.ato,
2374                 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2375                 tp->snd_cwnd,
2376                 tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh);
2377 }
2378
2379 static void get_timewait4_sock(struct inet_timewait_sock *tw,
2380                                char *tmpbuf, int i)
2381 {
2382         __be32 dest, src;
2383         __u16 destp, srcp;
2384         int ttd = tw->tw_ttd - jiffies;
2385
2386         if (ttd < 0)
2387                 ttd = 0;
2388
2389         dest  = tw->tw_daddr;
2390         src   = tw->tw_rcv_saddr;
2391         destp = ntohs(tw->tw_dport);
2392         srcp  = ntohs(tw->tw_sport);
2393
2394         sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2395                 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p",
2396                 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2397                 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2398                 atomic_read(&tw->tw_refcnt), tw);
2399 }
2400
2401 #define TMPSZ 150
2402
2403 static int tcp4_seq_show(struct seq_file *seq, void *v)
2404 {
2405         struct tcp_iter_state* st;
2406         char tmpbuf[TMPSZ + 1];
2407
2408         if (v == SEQ_START_TOKEN) {
2409                 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2410                            "  sl  local_address rem_address   st tx_queue "
2411                            "rx_queue tr tm->when retrnsmt   uid  timeout "
2412                            "inode");
2413                 goto out;
2414         }
2415         st = seq->private;
2416
2417         switch (st->state) {
2418         case TCP_SEQ_STATE_LISTENING:
2419         case TCP_SEQ_STATE_ESTABLISHED:
2420                 get_tcp4_sock(v, tmpbuf, st->num);
2421                 break;
2422         case TCP_SEQ_STATE_OPENREQ:
2423                 get_openreq4(st->syn_wait_sk, v, tmpbuf, st->num, st->uid);
2424                 break;
2425         case TCP_SEQ_STATE_TIME_WAIT:
2426                 get_timewait4_sock(v, tmpbuf, st->num);
2427                 break;
2428         }
2429         seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf);
2430 out:
2431         return 0;
2432 }
2433
2434 static struct file_operations tcp4_seq_fops;
2435 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2436         .owner          = THIS_MODULE,
2437         .name           = "tcp",
2438         .family         = AF_INET,
2439         .seq_show       = tcp4_seq_show,
2440         .seq_fops       = &tcp4_seq_fops,
2441 };
2442
2443 int __init tcp4_proc_init(void)
2444 {
2445         return tcp_proc_register(&tcp4_seq_afinfo);
2446 }
2447
2448 void tcp4_proc_exit(void)
2449 {
2450         tcp_proc_unregister(&tcp4_seq_afinfo);
2451 }
2452 #endif /* CONFIG_PROC_FS */
2453
2454 struct proto tcp_prot = {
2455         .name                   = "TCP",
2456         .owner                  = THIS_MODULE,
2457         .close                  = tcp_close,
2458         .connect                = tcp_v4_connect,
2459         .disconnect             = tcp_disconnect,
2460         .accept                 = inet_csk_accept,
2461         .ioctl                  = tcp_ioctl,
2462         .init                   = tcp_v4_init_sock,
2463         .destroy                = tcp_v4_destroy_sock,
2464         .shutdown               = tcp_shutdown,
2465         .setsockopt             = tcp_setsockopt,
2466         .getsockopt             = tcp_getsockopt,
2467         .sendmsg                = tcp_sendmsg,
2468         .recvmsg                = tcp_recvmsg,
2469         .backlog_rcv            = tcp_v4_do_rcv,
2470         .hash                   = tcp_v4_hash,
2471         .unhash                 = tcp_unhash,
2472         .get_port               = tcp_v4_get_port,
2473         .enter_memory_pressure  = tcp_enter_memory_pressure,
2474         .sockets_allocated      = &tcp_sockets_allocated,
2475         .orphan_count           = &tcp_orphan_count,
2476         .memory_allocated       = &tcp_memory_allocated,
2477         .memory_pressure        = &tcp_memory_pressure,
2478         .sysctl_mem             = sysctl_tcp_mem,
2479         .sysctl_wmem            = sysctl_tcp_wmem,
2480         .sysctl_rmem            = sysctl_tcp_rmem,
2481         .max_header             = MAX_TCP_HEADER,
2482         .obj_size               = sizeof(struct tcp_sock),
2483         .twsk_prot              = &tcp_timewait_sock_ops,
2484         .rsk_prot               = &tcp_request_sock_ops,
2485 #ifdef CONFIG_COMPAT
2486         .compat_setsockopt      = compat_tcp_setsockopt,
2487         .compat_getsockopt      = compat_tcp_getsockopt,
2488 #endif
2489 };
2490
2491 void __init tcp_v4_init(struct net_proto_family *ops)
2492 {
2493         if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW,
2494                                      IPPROTO_TCP) < 0)
2495                 panic("Failed to create the TCP control socket.\n");
2496 }
2497
2498 EXPORT_SYMBOL(ipv4_specific);
2499 EXPORT_SYMBOL(tcp_hashinfo);
2500 EXPORT_SYMBOL(tcp_prot);
2501 EXPORT_SYMBOL(tcp_unhash);
2502 EXPORT_SYMBOL(tcp_v4_conn_request);
2503 EXPORT_SYMBOL(tcp_v4_connect);
2504 EXPORT_SYMBOL(tcp_v4_do_rcv);
2505 EXPORT_SYMBOL(tcp_v4_remember_stamp);
2506 EXPORT_SYMBOL(tcp_v4_send_check);
2507 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2508
2509 #ifdef CONFIG_PROC_FS
2510 EXPORT_SYMBOL(tcp_proc_register);
2511 EXPORT_SYMBOL(tcp_proc_unregister);
2512 #endif
2513 EXPORT_SYMBOL(sysctl_local_port_range);
2514 EXPORT_SYMBOL(sysctl_tcp_low_latency);
2515