err.no Git - linux-2.6/blob - net/ipv4/tcp_ipv4.c

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  * Version:     $Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $
   9  *
  10  *              IPv4 specific functions
  11  *
  12  *
  13  *              code split from:
  14  *              linux/ipv4/tcp.c
  15  *              linux/ipv4/tcp_input.c
  16  *              linux/ipv4/tcp_output.c
  17  *
  18  *              See tcp.c for author information
  19  *
  20  *      This program is free software; you can redistribute it and/or
  21  *      modify it under the terms of the GNU General Public License
  22  *      as published by the Free Software Foundation; either version
  23  *      2 of the License, or (at your option) any later version.
  24  */
  25
  26 /*
  27  * Changes:
  28  *              David S. Miller :       New socket lookup architecture.
  29  *                                      This code is dedicated to John Dyson.
  30  *              David S. Miller :       Change semantics of established hash,
  31  *                                      half is devoted to TIME_WAIT sockets
  32  *                                      and the rest go in the other half.
  33  *              Andi Kleen :            Add support for syncookies and fixed
  34  *                                      some bugs: ip options weren't passed to
  35  *                                      the TCP layer, missed a check for an
  36  *                                      ACK bit.
  37  *              Andi Kleen :            Implemented fast path mtu discovery.
  38  *                                      Fixed many serious bugs in the
  39  *                                      request_sock handling and moved
  40  *                                      most of it into the af independent code.
  41  *                                      Added tail drop and some other bugfixes.
  42  *                                      Added new listen semantics.
  43  *              Mike McLagan    :       Routing by source
  44  *      Juan Jose Ciarlante:            ip_dynaddr bits
  45  *              Andi Kleen:             various fixes.
  46  *      Vitaly E. Lavrov        :       Transparent proxy revived after year
  47  *                                      coma.
  48  *      Andi Kleen              :       Fix new listen.
  49  *      Andi Kleen              :       Fix accept error reporting.
  50  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
  51  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
  52  *                                      a single port at the same time.
  53  */
  54
  55
  56 #include <linux/types.h>
  57 #include <linux/fcntl.h>
  58 #include <linux/module.h>
  59 #include <linux/random.h>
  60 #include <linux/cache.h>
  61 #include <linux/jhash.h>
  62 #include <linux/init.h>
  63 #include <linux/times.h>
  64
  65 #include <net/icmp.h>
  66 #include <net/inet_hashtables.h>
  67 #include <net/tcp.h>
  68 #include <net/transp_v6.h>
  69 #include <net/ipv6.h>
  70 #include <net/inet_common.h>
  71 #include <net/timewait_sock.h>
  72 #include <net/xfrm.h>
  73 #include <net/netdma.h>
  74
  75 #include <linux/inet.h>
  76 #include <linux/ipv6.h>
  77 #include <linux/stddef.h>
  78 #include <linux/proc_fs.h>
  79 #include <linux/seq_file.h>
  80
  81 #include <linux/crypto.h>
  82 #include <linux/scatterlist.h>
  83
  84 int sysctl_tcp_tw_reuse __read_mostly;
  85 int sysctl_tcp_low_latency __read_mostly;
  86
  87 /* Check TCP sequence numbers in ICMP packets. */
  88 #define ICMP_MIN_LENGTH 8
  89
  90 /* Socket used for sending RSTs */
  91 static struct socket *tcp_socket;
  92
  93 void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
  94
  95 #ifdef CONFIG_TCP_MD5SIG
  96 static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr);
  97 static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
  98                                    __be32 saddr, __be32 daddr, struct tcphdr *th,
  99                                    int protocol, int tcplen);
 100 #endif
 101
 102 struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
 103         .lhash_lock     = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock),
 104         .lhash_users    = ATOMIC_INIT(0),
 105         .lhash_wait     = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
 106 };
 107
 108 static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
 109 {
 110         return inet_csk_get_port(&tcp_hashinfo, sk, snum,
 111                                  inet_csk_bind_conflict);
 112 }
 113
 114 static void tcp_v4_hash(struct sock *sk)
 115 {
 116         inet_hash(&tcp_hashinfo, sk);
 117 }
 118
 119 void tcp_unhash(struct sock *sk)
 120 {
 121         inet_unhash(&tcp_hashinfo, sk);
 122 }
 123
 124 static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
 125 {
 126         return secure_tcp_sequence_number(skb->nh.iph->daddr,
 127                                           skb->nh.iph->saddr,
 128                                           skb->h.th->dest,
 129                                           skb->h.th->source);
 130 }
 131
 132 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 133 {
 134         const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
 135         struct tcp_sock *tp = tcp_sk(sk);
 136
 137         /* With PAWS, it is safe from the viewpoint
 138            of data integrity. Even without PAWS it is safe provided sequence
 139            spaces do not overlap i.e. at data rates <= 80Mbit/sec.
 140
 141            Actually, the idea is close to VJ's one, only timestamp cache is
 142            held not per host, but per port pair and TW bucket is used as state
 143            holder.
 144
 145            If TW bucket has been already destroyed we fall back to VJ's scheme
 146            and use initial timestamp retrieved from peer table.
 147          */
 148         if (tcptw->tw_ts_recent_stamp &&
 149             (twp == NULL || (sysctl_tcp_tw_reuse &&
 150                              xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
 151                 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
 152                 if (tp->write_seq == 0)
 153                         tp->write_seq = 1;
 154                 tp->rx_opt.ts_recent       = tcptw->tw_ts_recent;
 155                 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
 156                 sock_hold(sktw);
 157                 return 1;
 158         }
 159
 160         return 0;
 161 }
 162
 163 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
 164
 165 /* This will initiate an outgoing connection. */
 166 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 167 {
 168         struct inet_sock *inet = inet_sk(sk);
 169         struct tcp_sock *tp = tcp_sk(sk);
 170         struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
 171         struct rtable *rt;
 172         __be32 daddr, nexthop;
 173         int tmp;
 174         int err;
 175
 176         if (addr_len < sizeof(struct sockaddr_in))
 177                 return -EINVAL;
 178
 179         if (usin->sin_family != AF_INET)
 180                 return -EAFNOSUPPORT;
 181
 182         nexthop = daddr = usin->sin_addr.s_addr;
 183         if (inet->opt && inet->opt->srr) {
 184                 if (!daddr)
 185                         return -EINVAL;
 186                 nexthop = inet->opt->faddr;
 187         }
 188
 189         tmp = ip_route_connect(&rt, nexthop, inet->saddr,
 190                                RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
 191                                IPPROTO_TCP,
 192                                inet->sport, usin->sin_port, sk);
 193         if (tmp < 0)
 194                 return tmp;
 195
 196         if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
 197                 ip_rt_put(rt);
 198                 return -ENETUNREACH;
 199         }
 200
 201         if (!inet->opt || !inet->opt->srr)
 202                 daddr = rt->rt_dst;
 203
 204         if (!inet->saddr)
 205                 inet->saddr = rt->rt_src;
 206         inet->rcv_saddr = inet->saddr;
 207
 208         if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
 209                 /* Reset inherited state */
 210                 tp->rx_opt.ts_recent       = 0;
 211                 tp->rx_opt.ts_recent_stamp = 0;
 212                 tp->write_seq              = 0;
 213         }
 214
 215         if (tcp_death_row.sysctl_tw_recycle &&
 216             !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
 217                 struct inet_peer *peer = rt_get_peer(rt);
 218
 219                 /* VJ's idea. We save last timestamp seen from
 220                  * the destination in peer table, when entering state TIME-WAIT
 221                  * and initialize rx_opt.ts_recent from it, when trying new connection.
 222                  */
 223
 224                 if (peer && peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {
 225                         tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
 226                         tp->rx_opt.ts_recent = peer->tcp_ts;
 227                 }
 228         }
 229
 230         inet->dport = usin->sin_port;
 231         inet->daddr = daddr;
 232
 233         inet_csk(sk)->icsk_ext_hdr_len = 0;
 234         if (inet->opt)
 235                 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
 236
 237         tp->rx_opt.mss_clamp = 536;
 238
 239         /* Socket identity is still unknown (sport may be zero).
 240          * However we set state to SYN-SENT and not releasing socket
 241          * lock select source port, enter ourselves into the hash tables and
 242          * complete initialization after this.
 243          */
 244         tcp_set_state(sk, TCP_SYN_SENT);
 245         err = inet_hash_connect(&tcp_death_row, sk);
 246         if (err)
 247                 goto failure;
 248
 249         err = ip_route_newports(&rt, IPPROTO_TCP, inet->sport, inet->dport, sk);
 250         if (err)
 251                 goto failure;
 252
 253         /* OK, now commit destination to socket.  */
 254         sk->sk_gso_type = SKB_GSO_TCPV4;
 255         sk_setup_caps(sk, &rt->u.dst);
 256
 257         if (!tp->write_seq)
 258                 tp->write_seq = secure_tcp_sequence_number(inet->saddr,
 259                                                            inet->daddr,
 260                                                            inet->sport,
 261                                                            usin->sin_port);
 262
 263         inet->id = tp->write_seq ^ jiffies;
 264
 265         err = tcp_connect(sk);
 266         rt = NULL;
 267         if (err)
 268                 goto failure;
 269
 270         return 0;
 271
 272 failure:
 273         /* This unhashes the socket and releases the local port, if necessary. */
 274         tcp_set_state(sk, TCP_CLOSE);
 275         ip_rt_put(rt);
 276         sk->sk_route_caps = 0;
 277         inet->dport = 0;
 278         return err;
 279 }
 280
 281 /*
 282  * This routine does path mtu discovery as defined in RFC1191.
 283  */
 284 static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
 285 {
 286         struct dst_entry *dst;
 287         struct inet_sock *inet = inet_sk(sk);
 288
 289         /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
 290          * send out by Linux are always <576bytes so they should go through
 291          * unfragmented).
 292          */
 293         if (sk->sk_state == TCP_LISTEN)
 294                 return;
 295
 296         /* We don't check in the destentry if pmtu discovery is forbidden
 297          * on this route. We just assume that no packet_to_big packets
 298          * are send back when pmtu discovery is not active.
 299          * There is a small race when the user changes this flag in the
 300          * route, but I think that's acceptable.
 301          */
 302         if ((dst = __sk_dst_check(sk, 0)) == NULL)
 303                 return;
 304
 305         dst->ops->update_pmtu(dst, mtu);
 306
 307         /* Something is about to be wrong... Remember soft error
 308          * for the case, if this connection will not able to recover.
 309          */
 310         if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
 311                 sk->sk_err_soft = EMSGSIZE;
 312
 313         mtu = dst_mtu(dst);
 314
 315         if (inet->pmtudisc != IP_PMTUDISC_DONT &&
 316             inet_csk(sk)->icsk_pmtu_cookie > mtu) {
 317                 tcp_sync_mss(sk, mtu);
 318
 319                 /* Resend the TCP packet because it's
 320                  * clear that the old packet has been
 321                  * dropped. This is the new "fast" path mtu
 322                  * discovery.
 323                  */
 324                 tcp_simple_retransmit(sk);
 325         } /* else let the usual retransmit timer handle it */
 326 }
 327
 328 /*
 329  * This routine is called by the ICMP module when it gets some
 330  * sort of error condition.  If err < 0 then the socket should
 331  * be closed and the error returned to the user.  If err > 0
 332  * it's just the icmp type << 8 | icmp code.  After adjustment
 333  * header points to the first 8 bytes of the tcp header.  We need
 334  * to find the appropriate port.
 335  *
 336  * The locking strategy used here is very "optimistic". When
 337  * someone else accesses the socket the ICMP is just dropped
 338  * and for some paths there is no check at all.
 339  * A more general error queue to queue errors for later handling
 340  * is probably better.
 341  *
 342  */
 343
 344 void tcp_v4_err(struct sk_buff *skb, u32 info)
 345 {
 346         struct iphdr *iph = (struct iphdr *)skb->data;
 347         struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
 348         struct tcp_sock *tp;
 349         struct inet_sock *inet;
 350         int type = skb->h.icmph->type;
 351         int code = skb->h.icmph->code;
 352         struct sock *sk;
 353         __u32 seq;
 354         int err;
 355
 356         if (skb->len < (iph->ihl << 2) + 8) {
 357                 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
 358                 return;
 359         }
 360
 361         sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr,
 362                          th->source, inet_iif(skb));
 363         if (!sk) {
 364                 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
 365                 return;
 366         }
 367         if (sk->sk_state == TCP_TIME_WAIT) {
 368                 inet_twsk_put(inet_twsk(sk));
 369                 return;
 370         }
 371
 372         bh_lock_sock(sk);
 373         /* If too many ICMPs get dropped on busy
 374          * servers this needs to be solved differently.
 375          */
 376         if (sock_owned_by_user(sk))
 377                 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
 378
 379         if (sk->sk_state == TCP_CLOSE)
 380                 goto out;
 381
 382         tp = tcp_sk(sk);
 383         seq = ntohl(th->seq);
 384         if (sk->sk_state != TCP_LISTEN &&
 385             !between(seq, tp->snd_una, tp->snd_nxt)) {
 386                 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
 387                 goto out;
 388         }
 389
 390         switch (type) {
 391         case ICMP_SOURCE_QUENCH:
 392                 /* Just silently ignore these. */
 393                 goto out;
 394         case ICMP_PARAMETERPROB:
 395                 err = EPROTO;
 396                 break;
 397         case ICMP_DEST_UNREACH:
 398                 if (code > NR_ICMP_UNREACH)
 399                         goto out;
 400
 401                 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
 402                         if (!sock_owned_by_user(sk))
 403                                 do_pmtu_discovery(sk, iph, info);
 404                         goto out;
 405                 }
 406
 407                 err = icmp_err_convert[code].errno;
 408                 break;
 409         case ICMP_TIME_EXCEEDED:
 410                 err = EHOSTUNREACH;
 411                 break;
 412         default:
 413                 goto out;
 414         }
 415
 416         switch (sk->sk_state) {
 417                 struct request_sock *req, **prev;
 418         case TCP_LISTEN:
 419                 if (sock_owned_by_user(sk))
 420                         goto out;
 421
 422                 req = inet_csk_search_req(sk, &prev, th->dest,
 423                                           iph->daddr, iph->saddr);
 424                 if (!req)
 425                         goto out;
 426
 427                 /* ICMPs are not backlogged, hence we cannot get
 428                    an established socket here.
 429                  */
 430                 BUG_TRAP(!req->sk);
 431
 432                 if (seq != tcp_rsk(req)->snt_isn) {
 433                         NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
 434                         goto out;
 435                 }
 436
 437                 /*
 438                  * Still in SYN_RECV, just remove it silently.
 439                  * There is no good way to pass the error to the newly
 440                  * created socket, and POSIX does not want network
 441                  * errors returned from accept().
 442                  */
 443                 inet_csk_reqsk_queue_drop(sk, req, prev);
 444                 goto out;
 445
 446         case TCP_SYN_SENT:
 447         case TCP_SYN_RECV:  /* Cannot happen.
 448                                It can f.e. if SYNs crossed.
 449                              */
 450                 if (!sock_owned_by_user(sk)) {
 451                         sk->sk_err = err;
 452
 453                         sk->sk_error_report(sk);
 454
 455                         tcp_done(sk);
 456                 } else {
 457                         sk->sk_err_soft = err;
 458                 }
 459                 goto out;
 460         }
 461
 462         /* If we've already connected we will keep trying
 463          * until we time out, or the user gives up.
 464          *
 465          * rfc1122 4.2.3.9 allows to consider as hard errors
 466          * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
 467          * but it is obsoleted by pmtu discovery).
 468          *
 469          * Note, that in modern internet, where routing is unreliable
 470          * and in each dark corner broken firewalls sit, sending random
 471          * errors ordered by their masters even this two messages finally lose
 472          * their original sense (even Linux sends invalid PORT_UNREACHs)
 473          *
 474          * Now we are in compliance with RFCs.
 475          *                                                      --ANK (980905)
 476          */
 477
 478         inet = inet_sk(sk);
 479         if (!sock_owned_by_user(sk) && inet->recverr) {
 480                 sk->sk_err = err;
 481                 sk->sk_error_report(sk);
 482         } else  { /* Only an error on timeout */
 483                 sk->sk_err_soft = err;
 484         }
 485
 486 out:
 487         bh_unlock_sock(sk);
 488         sock_put(sk);
 489 }
 490
 491 /* This routine computes an IPv4 TCP checksum. */
 492 void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
 493 {
 494         struct inet_sock *inet = inet_sk(sk);
 495         struct tcphdr *th = skb->h.th;
 496
 497         if (skb->ip_summed == CHECKSUM_PARTIAL) {
 498                 th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0);
 499                 skb->csum = offsetof(struct tcphdr, check);
 500         } else {
 501                 th->check = tcp_v4_check(th, len, inet->saddr, inet->daddr,
 502                                          csum_partial((char *)th,
 503                                                       th->doff << 2,
 504                                                       skb->csum));
 505         }
 506 }
 507
 508 int tcp_v4_gso_send_check(struct sk_buff *skb)
 509 {
 510         struct iphdr *iph;
 511         struct tcphdr *th;
 512
 513         if (!pskb_may_pull(skb, sizeof(*th)))
 514                 return -EINVAL;
 515
 516         iph = skb->nh.iph;
 517         th = skb->h.th;
 518
 519         th->check = 0;
 520         th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0);
 521         skb->csum = offsetof(struct tcphdr, check);
 522         skb->ip_summed = CHECKSUM_PARTIAL;
 523         return 0;
 524 }
 525
 526 /*
 527  *      This routine will send an RST to the other tcp.
 528  *
 529  *      Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
 530  *                    for reset.
 531  *      Answer: if a packet caused RST, it is not for a socket
 532  *              existing in our system, if it is matched to a socket,
 533  *              it is just duplicate segment or bug in other side's TCP.
 534  *              So that we build reply only basing on parameters
 535  *              arrived with segment.
 536  *      Exception: precedence violation. We do not implement it in any case.
 537  */
 538
 539 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 540 {
 541         struct tcphdr *th = skb->h.th;
 542         struct {
 543                 struct tcphdr th;
 544 #ifdef CONFIG_TCP_MD5SIG
 545                 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
 546 #endif
 547         } rep;
 548         struct ip_reply_arg arg;
 549 #ifdef CONFIG_TCP_MD5SIG
 550         struct tcp_md5sig_key *key;
 551 #endif
 552
 553         /* Never send a reset in response to a reset. */
 554         if (th->rst)
 555                 return;
 556
 557         if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL)
 558                 return;
 559
 560         /* Swap the send and the receive. */
 561         memset(&rep, 0, sizeof(rep));
 562         rep.th.dest   = th->source;
 563         rep.th.source = th->dest;
 564         rep.th.doff   = sizeof(struct tcphdr) / 4;
 565         rep.th.rst    = 1;
 566
 567         if (th->ack) {
 568                 rep.th.seq = th->ack_seq;
 569         } else {
 570                 rep.th.ack = 1;
 571                 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
 572                                        skb->len - (th->doff << 2));
 573         }
 574
 575         memset(&arg, 0, sizeof arg);
 576         arg.iov[0].iov_base = (unsigned char *)&rep;
 577         arg.iov[0].iov_len  = sizeof(rep.th);
 578
 579 #ifdef CONFIG_TCP_MD5SIG
 580         key = sk ? tcp_v4_md5_do_lookup(sk, skb->nh.iph->daddr) : NULL;
 581         if (key) {
 582                 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
 583                                    (TCPOPT_NOP << 16) |
 584                                    (TCPOPT_MD5SIG << 8) |
 585                                    TCPOLEN_MD5SIG);
 586                 /* Update length and the length the header thinks exists */
 587                 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 588                 rep.th.doff = arg.iov[0].iov_len / 4;
 589
 590                 tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1],
 591                                         key,
 592                                         skb->nh.iph->daddr,
 593                                         skb->nh.iph->saddr,
 594                                         &rep.th, IPPROTO_TCP,
 595                                         arg.iov[0].iov_len);
 596         }
 597 #endif
 598
 599         arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
 600                                       skb->nh.iph->saddr, /*XXX*/
 601                                       sizeof(struct tcphdr), IPPROTO_TCP, 0);
 602         arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 603
 604         ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
 605
 606         TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
 607         TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
 608 }
 609
 610 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
 611    outside socket context is ugly, certainly. What can I do?
 612  */
 613
 614 static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
 615                             struct sk_buff *skb, u32 seq, u32 ack,
 616                             u32 win, u32 ts)
 617 {
 618         struct tcphdr *th = skb->h.th;
 619         struct {
 620                 struct tcphdr th;
 621                 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
 622 #ifdef CONFIG_TCP_MD5SIG
 623                            + (TCPOLEN_MD5SIG_ALIGNED >> 2)
 624 #endif
 625                         ];
 626         } rep;
 627         struct ip_reply_arg arg;
 628 #ifdef CONFIG_TCP_MD5SIG
 629         struct tcp_md5sig_key *key;
 630         struct tcp_md5sig_key tw_key;
 631 #endif
 632
 633         memset(&rep.th, 0, sizeof(struct tcphdr));
 634         memset(&arg, 0, sizeof arg);
 635
 636         arg.iov[0].iov_base = (unsigned char *)&rep;
 637         arg.iov[0].iov_len  = sizeof(rep.th);
 638         if (ts) {
 639                 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 640                                    (TCPOPT_TIMESTAMP << 8) |
 641                                    TCPOLEN_TIMESTAMP);
 642                 rep.opt[1] = htonl(tcp_time_stamp);
 643                 rep.opt[2] = htonl(ts);
 644                 arg.iov[0].iov_len = TCPOLEN_TSTAMP_ALIGNED;
 645         }
 646
 647         /* Swap the send and the receive. */
 648         rep.th.dest    = th->source;
 649         rep.th.source  = th->dest;
 650         rep.th.doff    = arg.iov[0].iov_len / 4;
 651         rep.th.seq     = htonl(seq);
 652         rep.th.ack_seq = htonl(ack);
 653         rep.th.ack     = 1;
 654         rep.th.window  = htons(win);
 655
 656 #ifdef CONFIG_TCP_MD5SIG
 657         /*
 658          * The SKB holds an imcoming packet, but may not have a valid ->sk
 659          * pointer. This is especially the case when we're dealing with a
 660          * TIME_WAIT ack, because the sk structure is long gone, and only
 661          * the tcp_timewait_sock remains. So the md5 key is stashed in that
 662          * structure, and we use it in preference.  I believe that (twsk ||
 663          * skb->sk) holds true, but we program defensively.
 664          */
 665         if (!twsk && skb->sk) {
 666                 key = tcp_v4_md5_do_lookup(skb->sk, skb->nh.iph->daddr);
 667         } else if (twsk && twsk->tw_md5_keylen) {
 668                 tw_key.key = twsk->tw_md5_key;
 669                 tw_key.keylen = twsk->tw_md5_keylen;
 670                 key = &tw_key;
 671         } else {
 672                 key = NULL;
 673         }
 674
 675         if (key) {
 676                 int offset = (ts) ? 3 : 0;
 677
 678                 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
 679                                           (TCPOPT_NOP << 16) |
 680                                           (TCPOPT_MD5SIG << 8) |
 681                                           TCPOLEN_MD5SIG);
 682                 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 683                 rep.th.doff = arg.iov[0].iov_len/4;
 684
 685                 tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset],
 686                                         key,
 687                                         skb->nh.iph->daddr,
 688                                         skb->nh.iph->saddr,
 689                                         &rep.th, IPPROTO_TCP,
 690                                         arg.iov[0].iov_len);
 691         }
 692 #endif
 693
 694         arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
 695                                       skb->nh.iph->saddr, /*XXX*/
 696                                       arg.iov[0].iov_len, IPPROTO_TCP, 0);
 697         arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 698
 699         ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
 700
 701         TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
 702 }
 703
 704 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 705 {
 706         struct inet_timewait_sock *tw = inet_twsk(sk);
 707         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
 708
 709         tcp_v4_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 710                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_ts_recent);
 711
 712         inet_twsk_put(tw);
 713 }
 714
 715 static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
 716 {
 717         tcp_v4_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1,
 718                         tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
 719                         req->ts_recent);
 720 }
 721
 722 /*
 723  *      Send a SYN-ACK after having received an ACK.
 724  *      This still operates on a request_sock only, not on a big
 725  *      socket.
 726  */
 727 static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
 728                               struct dst_entry *dst)
 729 {
 730         const struct inet_request_sock *ireq = inet_rsk(req);
 731         int err = -1;
 732         struct sk_buff * skb;
 733
 734         /* First, grab a route. */
 735         if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
 736                 goto out;
 737
 738         skb = tcp_make_synack(sk, dst, req);
 739
 740         if (skb) {
 741                 struct tcphdr *th = skb->h.th;
 742
 743                 th->check = tcp_v4_check(th, skb->len,
 744                                          ireq->loc_addr,
 745                                          ireq->rmt_addr,
 746                                          csum_partial((char *)th, skb->len,
 747                                                       skb->csum));
 748
 749                 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
 750                                             ireq->rmt_addr,
 751                                             ireq->opt);
 752                 err = net_xmit_eval(err);
 753         }
 754
 755 out:
 756         dst_release(dst);
 757         return err;
 758 }
 759
 760 /*
 761  *      IPv4 request_sock destructor.
 762  */
 763 static void tcp_v4_reqsk_destructor(struct request_sock *req)
 764 {
 765         kfree(inet_rsk(req)->opt);
 766 }
 767
 768 #ifdef CONFIG_SYN_COOKIES
 769 static void syn_flood_warning(struct sk_buff *skb)
 770 {
 771         static unsigned long warntime;
 772
 773         if (time_after(jiffies, (warntime + HZ * 60))) {
 774                 warntime = jiffies;
 775                 printk(KERN_INFO
 776                        "possible SYN flooding on port %d. Sending cookies.\n",
 777                        ntohs(skb->h.th->dest));
 778         }
 779 }
 780 #endif
 781
 782 /*
 783  * Save and compile IPv4 options into the request_sock if needed.
 784  */
 785 static struct ip_options *tcp_v4_save_options(struct sock *sk,
 786                                               struct sk_buff *skb)
 787 {
 788         struct ip_options *opt = &(IPCB(skb)->opt);
 789         struct ip_options *dopt = NULL;
 790
 791         if (opt && opt->optlen) {
 792                 int opt_size = optlength(opt);
 793                 dopt = kmalloc(opt_size, GFP_ATOMIC);
 794                 if (dopt) {
 795                         if (ip_options_echo(dopt, skb)) {
 796                                 kfree(dopt);
 797                                 dopt = NULL;
 798                         }
 799                 }
 800         }
 801         return dopt;
 802 }
 803
 804 #ifdef CONFIG_TCP_MD5SIG
 805 /*
 806  * RFC2385 MD5 checksumming requires a mapping of
 807  * IP address->MD5 Key.
 808  * We need to maintain these in the sk structure.
 809  */
 810
 811 /* Find the Key structure for an address.  */
 812 static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
 813 {
 814         struct tcp_sock *tp = tcp_sk(sk);
 815         int i;
 816
 817         if (!tp->md5sig_info || !tp->md5sig_info->entries4)
 818                 return NULL;
 819         for (i = 0; i < tp->md5sig_info->entries4; i++) {
 820                 if (tp->md5sig_info->keys4[i].addr == addr)
 821                         return (struct tcp_md5sig_key *)&tp->md5sig_info->keys4[i];
 822         }
 823         return NULL;
 824 }
 825
 826 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
 827                                          struct sock *addr_sk)
 828 {
 829         return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr);
 830 }
 831
 832 EXPORT_SYMBOL(tcp_v4_md5_lookup);
 833
 834 struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
 835                                                struct request_sock *req)
 836 {
 837         return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
 838 }
 839
 840 /* This can be called on a newly created socket, from other files */
 841 int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
 842                       u8 *newkey, u8 newkeylen)
 843 {
 844         /* Add Key to the list */
 845         struct tcp4_md5sig_key *key;
 846         struct tcp_sock *tp = tcp_sk(sk);
 847         struct tcp4_md5sig_key *keys;
 848
 849         key = (struct tcp4_md5sig_key *) tcp_v4_md5_do_lookup(sk, addr);
 850         if (key) {
 851                 /* Pre-existing entry - just update that one. */
 852                 kfree (key->key);
 853                 key->key = newkey;
 854                 key->keylen = newkeylen;
 855         } else {
 856                 if (!tp->md5sig_info) {
 857                         tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), GFP_ATOMIC);
 858                         if (!tp->md5sig_info) {
 859                                 kfree(newkey);
 860                                 return -ENOMEM;
 861                         }
 862                 }
 863                 if (tcp_alloc_md5sig_pool() == NULL) {
 864                         kfree(newkey);
 865                         return -ENOMEM;
 866                 }
 867                 if (tp->md5sig_info->alloced4 == tp->md5sig_info->entries4) {
 868                         keys = kmalloc((sizeof(struct tcp4_md5sig_key) *
 869                                        (tp->md5sig_info->entries4 + 1)), GFP_ATOMIC);
 870                         if (!keys) {
 871                                 kfree(newkey);
 872                                 tcp_free_md5sig_pool();
 873                                 return -ENOMEM;
 874                         }
 875
 876                         if (tp->md5sig_info->entries4)
 877                                 memcpy(keys, tp->md5sig_info->keys4,
 878                                        (sizeof (struct tcp4_md5sig_key) *
 879                                         tp->md5sig_info->entries4));
 880
 881                         /* Free old key list, and reference new one */
 882                         if (tp->md5sig_info->keys4)
 883                                 kfree(tp->md5sig_info->keys4);
 884                         tp->md5sig_info->keys4 = keys;
 885                         tp->md5sig_info->alloced4++;
 886                 }
 887                 tp->md5sig_info->entries4++;
 888                 tp->md5sig_info->keys4[tp->md5sig_info->entries4 - 1].addr = addr;
 889                 tp->md5sig_info->keys4[tp->md5sig_info->entries4 - 1].key = newkey;
 890                 tp->md5sig_info->keys4[tp->md5sig_info->entries4 - 1].keylen = newkeylen;
 891         }
 892         return 0;
 893 }
 894
 895 EXPORT_SYMBOL(tcp_v4_md5_do_add);
 896
 897 static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
 898                                u8 *newkey, u8 newkeylen)
 899 {
 900         return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr,
 901                                  newkey, newkeylen);
 902 }
 903
 904 int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
 905 {
 906         struct tcp_sock *tp = tcp_sk(sk);
 907         int i;
 908
 909         for (i = 0; i < tp->md5sig_info->entries4; i++) {
 910                 if (tp->md5sig_info->keys4[i].addr == addr) {
 911                         /* Free the key */
 912                         kfree(tp->md5sig_info->keys4[i].key);
 913                         tp->md5sig_info->entries4--;
 914
 915                         if (tp->md5sig_info->entries4 == 0) {
 916                                 kfree(tp->md5sig_info->keys4);
 917                                 tp->md5sig_info->keys4 = NULL;
 918                         } else {
 919                                 /* Need to do some manipulation */
 920                                 if (tp->md5sig_info->entries4 != i)
 921                                         memcpy(&tp->md5sig_info->keys4[i],
 922                                                &tp->md5sig_info->keys4[i+1],
 923                                                (tp->md5sig_info->entries4 - i)
 924                                                 * sizeof (struct tcp4_md5sig_key));
 925                         }
 926                         tcp_free_md5sig_pool();
 927                         return 0;
 928                 }
 929         }
 930         return -ENOENT;
 931 }
 932
 933 EXPORT_SYMBOL(tcp_v4_md5_do_del);
 934
 935 static void tcp_v4_clear_md5_list (struct sock *sk)
 936 {
 937         struct tcp_sock *tp = tcp_sk(sk);
 938
 939         /* Free each key, then the set of key keys,
 940          * the crypto element, and then decrement our
 941          * hold on the last resort crypto.
 942          */
 943         if (tp->md5sig_info->entries4) {
 944                 int i;
 945                 for (i = 0; i < tp->md5sig_info->entries4; i++)
 946                         kfree(tp->md5sig_info->keys4[i].key);
 947                 tp->md5sig_info->entries4 = 0;
 948                 tcp_free_md5sig_pool();
 949         }
 950         if (tp->md5sig_info->keys4) {
 951                 kfree(tp->md5sig_info->keys4);
 952                 tp->md5sig_info->keys4 = NULL;
 953                 tp->md5sig_info->alloced4  = 0;
 954         }
 955 }
 956
 957 static int tcp_v4_parse_md5_keys (struct sock *sk, char __user *optval,
 958                                   int optlen)
 959 {
 960         struct tcp_md5sig cmd;
 961         struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
 962         u8 *newkey;
 963
 964         if (optlen < sizeof(cmd))
 965                 return -EINVAL;
 966
 967         if (copy_from_user (&cmd, optval, sizeof(cmd)))
 968                 return -EFAULT;
 969
 970         if (sin->sin_family != AF_INET)
 971                 return -EINVAL;
 972
 973         if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
 974                 if (!tcp_sk(sk)->md5sig_info)
 975                         return -ENOENT;
 976                 return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
 977         }
 978
 979         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
 980                 return -EINVAL;
 981
 982         if (!tcp_sk(sk)->md5sig_info) {
 983                 struct tcp_sock *tp = tcp_sk(sk);
 984                 struct tcp_md5sig_info *p;
 985
 986                 p = kzalloc(sizeof(struct tcp_md5sig_info), GFP_KERNEL);
 987                 if (!p)
 988                         return -EINVAL;
 989
 990                 tp->md5sig_info = p;
 991
 992         }
 993
 994         newkey = kmalloc(cmd.tcpm_keylen, GFP_KERNEL);
 995         if (!newkey)
 996                 return -ENOMEM;
 997         memcpy(newkey, cmd.tcpm_key, cmd.tcpm_keylen);
 998         return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
 999                                  newkey, cmd.tcpm_keylen);
1000 }
1001
1002 static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
1003                                    __be32 saddr, __be32 daddr,
1004                                    struct tcphdr *th, int protocol,
1005                                    int tcplen)
1006 {
1007         struct scatterlist sg[4];
1008         __u16 data_len;
1009         int block = 0;
1010 #ifdef CONFIG_TCP_MD5SIG_DEBUG
1011         int i;
1012 #endif
1013         __u16 old_checksum;
1014         struct tcp_md5sig_pool *hp;
1015         struct tcp4_pseudohdr *bp;
1016         struct hash_desc *desc;
1017         int err;
1018         unsigned int nbytes = 0;
1019
1020         /*
1021          * Okay, so RFC2385 is turned on for this connection,
1022          * so we need to generate the MD5 hash for the packet now.
1023          */
1024
1025         hp = tcp_get_md5sig_pool();
1026         if (!hp)
1027                 goto clear_hash_noput;
1028
1029         bp = &hp->md5_blk.ip4;
1030         desc = &hp->md5_desc;
1031
1032         /*
1033          * 1. the TCP pseudo-header (in the order: source IP address,
1034          * destination IP address, zero-padded protocol number, and
1035          * segment length)
1036          */
1037         bp->saddr = saddr;
1038         bp->daddr = daddr;
1039         bp->pad = 0;
1040         bp->protocol = protocol;
1041         bp->len = htons(tcplen);
1042         sg_set_buf(&sg[block++], bp, sizeof(*bp));
1043         nbytes += sizeof(*bp);
1044
1045 #ifdef CONFIG_TCP_MD5SIG_DEBUG
1046         printk("Calcuating hash for: ");
1047         for (i = 0; i < sizeof (*bp); i++)
1048                 printk ("%02x ", (unsigned int)((unsigned char *)bp)[i]);
1049         printk(" ");
1050 #endif
1051
1052         /* 2. the TCP header, excluding options, and assuming a
1053          * checksum of zero/
1054          */
1055         old_checksum = th->check;
1056         th->check = 0;
1057         sg_set_buf(&sg[block++], th, sizeof(struct tcphdr));
1058         nbytes += sizeof(struct tcphdr);
1059 #ifdef CONFIG_TCP_MD5SIG_DEBUG
1060         for (i = 0; i < sizeof (struct tcphdr); i++)
1061                 printk (" %02x", (unsigned int)((unsigned char *)th)[i]);
1062 #endif
1063         /* 3. the TCP segment data (if any) */
1064         data_len = tcplen - (th->doff << 2);
1065         if (data_len > 0) {
1066                 unsigned char *data = (unsigned char *)th + (th->doff << 2);
1067                 sg_set_buf(&sg[block++], data, data_len);
1068                 nbytes += data_len;
1069         }
1070
1071         /* 4. an independently-specified key or password, known to both
1072          * TCPs and presumably connection-specific
1073          */
1074         sg_set_buf(&sg[block++], key->key, key->keylen);
1075         nbytes += key->keylen;
1076
1077 #ifdef CONFIG_TCP_MD5SIG_DEBUG
1078         printk ("  and password: ");
1079         for (i = 0; i < key->keylen; i++)
1080                 printk ("%02x ", (unsigned int)key->key[i]);
1081 #endif
1082
1083         /* Now store the Hash into the packet */
1084         err = crypto_hash_init(desc);
1085         if (err)
1086                 goto clear_hash;
1087         err = crypto_hash_update(desc, sg, nbytes);
1088         if (err)
1089                 goto clear_hash;
1090         err = crypto_hash_final(desc, md5_hash);
1091         if (err)
1092                 goto clear_hash;
1093
1094         /* Reset header, and free up the crypto */
1095         tcp_put_md5sig_pool();
1096         th->check = old_checksum;
1097
1098 out:
1099 #ifdef CONFIG_TCP_MD5SIG_DEBUG
1100         printk(" result:");
1101         for (i = 0; i < 16; i++)
1102                 printk (" %02x", (unsigned int)(((u8*)md5_hash)[i]));
1103         printk("\n");
1104 #endif
1105         return 0;
1106 clear_hash:
1107         tcp_put_md5sig_pool();
1108 clear_hash_noput:
1109         memset(md5_hash, 0, 16);
1110         goto out;
1111 }
1112
1113 int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
1114                          struct sock *sk,
1115                          struct dst_entry *dst,
1116                          struct request_sock *req,
1117                          struct tcphdr *th, int protocol,
1118                          int tcplen)
1119 {
1120         __be32 saddr, daddr;
1121
1122         if (sk) {
1123                 saddr = inet_sk(sk)->saddr;
1124                 daddr = inet_sk(sk)->daddr;
1125         } else {
1126                 struct rtable *rt = (struct rtable *)dst;
1127                 BUG_ON(!rt);
1128                 saddr = rt->rt_src;
1129                 daddr = rt->rt_dst;
1130         }
1131         return tcp_v4_do_calc_md5_hash(md5_hash, key,
1132                                        saddr, daddr,
1133                                        th, protocol, tcplen);
1134 }
1135
1136 EXPORT_SYMBOL(tcp_v4_calc_md5_hash);
1137
1138 static int tcp_v4_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
1139 {
1140         /*
1141          * This gets called for each TCP segment that arrives
1142          * so we want to be efficient.
1143          * We have 3 drop cases:
1144          * o No MD5 hash and one expected.
1145          * o MD5 hash and we're not expecting one.
1146          * o MD5 hash and its wrong.
1147          */
1148         __u8 *hash_location = NULL;
1149         struct tcp_md5sig_key *hash_expected;
1150         struct iphdr *iph = skb->nh.iph;
1151         struct tcphdr *th = skb->h.th;
1152         int length = (th->doff << 2) - sizeof (struct tcphdr);
1153         int genhash;
1154         unsigned char *ptr;
1155         unsigned char newhash[16];
1156
1157         hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
1158
1159         /*
1160          * If the TCP option length is less than the TCP_MD5SIG
1161          * option length, then we can shortcut
1162          */
1163         if (length < TCPOLEN_MD5SIG) {
1164                 if (hash_expected)
1165                         return 1;
1166                 else
1167                         return 0;
1168         }
1169
1170         /* Okay, we can't shortcut - we have to grub through the options */
1171         ptr = (unsigned char *)(th + 1);
1172         while (length > 0) {
1173                 int opcode = *ptr++;
1174                 int opsize;
1175
1176                 switch (opcode) {
1177                 case TCPOPT_EOL:
1178                         goto done_opts;
1179                 case TCPOPT_NOP:
1180                         length--;
1181                         continue;
1182                 default:
1183                         opsize = *ptr++;
1184                         if (opsize < 2)
1185                                 goto done_opts;
1186                         if (opsize > length)
1187                                 goto done_opts;
1188
1189                         if (opcode == TCPOPT_MD5SIG) {
1190                                 hash_location = ptr;
1191                                 goto done_opts;
1192                         }
1193                 }
1194                 ptr += opsize-2;
1195                 length -= opsize;
1196         }
1197 done_opts:
1198         /* We've parsed the options - do we have a hash? */
1199         if (!hash_expected && !hash_location)
1200                 return 0;
1201
1202         if (hash_expected && !hash_location) {
1203                 if (net_ratelimit()) {
1204                         printk(KERN_INFO "MD5 Hash NOT expected but found "
1205                                "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
1206                                NIPQUAD (iph->saddr), ntohs(th->source),
1207                                NIPQUAD (iph->daddr), ntohs(th->dest));
1208                 }
1209                 return 1;
1210         }
1211
1212         if (!hash_expected && hash_location) {
1213                 if (net_ratelimit()) {
1214                         printk(KERN_INFO "MD5 Hash NOT expected but found "
1215                                "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
1216                                NIPQUAD (iph->saddr), ntohs(th->source),
1217                                NIPQUAD (iph->daddr), ntohs(th->dest));
1218                 }
1219                 return 1;
1220         }
1221
1222         /* Okay, so this is hash_expected and hash_location -
1223          * so we need to calculate the checksum.
1224          */
1225         genhash = tcp_v4_do_calc_md5_hash(newhash,
1226                                           hash_expected,
1227                                           iph->saddr, iph->daddr,
1228                                           th, sk->sk_protocol,
1229                                           skb->len);
1230
1231         if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1232                 if (net_ratelimit()) {
1233                         printk(KERN_INFO "MD5 Hash failed for "
1234                                "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)%s\n",
1235                                NIPQUAD (iph->saddr), ntohs(th->source),
1236                                NIPQUAD (iph->daddr), ntohs(th->dest),
1237                                genhash ? " tcp_v4_calc_md5_hash failed" : "");
1238 #ifdef CONFIG_TCP_MD5SIG_DEBUG
1239                         do {
1240                                 int i;
1241                                 printk("Received: ");
1242                                 for (i = 0; i < 16; i++)
1243                                         printk("%02x ", 0xff & (int)hash_location[i]);
1244                                 printk("\n");
1245                                 printk("Calculated: ");
1246                                 for (i = 0; i < 16; i++)
1247                                         printk("%02x ", 0xff & (int)newhash[i]);
1248                                 printk("\n");
1249                         } while(0);
1250 #endif
1251                 }
1252                 return 1;
1253         }
1254         return 0;
1255 }
1256
1257 #endif
1258
1259 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1260         .family         =       PF_INET,
1261         .obj_size       =       sizeof(struct tcp_request_sock),
1262         .rtx_syn_ack    =       tcp_v4_send_synack,
1263         .send_ack       =       tcp_v4_reqsk_send_ack,
1264         .destructor     =       tcp_v4_reqsk_destructor,
1265         .send_reset     =       tcp_v4_send_reset,
1266 };
1267
1268 struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1269 #ifdef CONFIG_TCP_MD5SIG
1270         .md5_lookup     =       tcp_v4_reqsk_md5_lookup,
1271 #endif
1272 };
1273
1274 static struct timewait_sock_ops tcp_timewait_sock_ops = {
1275         .twsk_obj_size  = sizeof(struct tcp_timewait_sock),
1276         .twsk_unique    = tcp_twsk_unique,
1277         .twsk_destructor= tcp_twsk_destructor,
1278 };
1279
1280 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1281 {
1282         struct inet_request_sock *ireq;
1283         struct tcp_options_received tmp_opt;
1284         struct request_sock *req;
1285         __be32 saddr = skb->nh.iph->saddr;
1286         __be32 daddr = skb->nh.iph->daddr;
1287         __u32 isn = TCP_SKB_CB(skb)->when;
1288         struct dst_entry *dst = NULL;
1289 #ifdef CONFIG_SYN_COOKIES
1290         int want_cookie = 0;
1291 #else
1292 #define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1293 #endif
1294
1295         /* Never answer to SYNs send to broadcast or multicast */
1296         if (((struct rtable *)skb->dst)->rt_flags &
1297             (RTCF_BROADCAST | RTCF_MULTICAST))
1298                 goto drop;
1299
1300         /* TW buckets are converted to open requests without
1301          * limitations, they conserve resources and peer is
1302          * evidently real one.
1303          */
1304         if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1305 #ifdef CONFIG_SYN_COOKIES
1306                 if (sysctl_tcp_syncookies) {
1307                         want_cookie = 1;
1308                 } else
1309 #endif
1310                 goto drop;
1311         }
1312
1313         /* Accept backlog is full. If we have already queued enough
1314          * of warm entries in syn queue, drop request. It is better than
1315          * clogging syn queue with openreqs with exponentially increasing
1316          * timeout.
1317          */
1318         if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1319                 goto drop;
1320
1321         req = reqsk_alloc(&tcp_request_sock_ops);
1322         if (!req)
1323                 goto drop;
1324
1325 #ifdef CONFIG_TCP_MD5SIG
1326         tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1327 #endif
1328
1329         tcp_clear_options(&tmp_opt);
1330         tmp_opt.mss_clamp = 536;
1331         tmp_opt.user_mss  = tcp_sk(sk)->rx_opt.user_mss;
1332
1333         tcp_parse_options(skb, &tmp_opt, 0);
1334
1335         if (want_cookie) {
1336                 tcp_clear_options(&tmp_opt);
1337                 tmp_opt.saw_tstamp = 0;
1338         }
1339
1340         if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
1341                 /* Some OSes (unknown ones, but I see them on web server, which
1342                  * contains information interesting only for windows'
1343                  * users) do not send their stamp in SYN. It is easy case.
1344                  * We simply do not advertise TS support.
1345                  */
1346                 tmp_opt.saw_tstamp = 0;
1347                 tmp_opt.tstamp_ok  = 0;
1348         }
1349         tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1350
1351         tcp_openreq_init(req, &tmp_opt, skb);
1352
1353         if (security_inet_conn_request(sk, skb, req))
1354                 goto drop_and_free;
1355
1356         ireq = inet_rsk(req);
1357         ireq->loc_addr = daddr;
1358         ireq->rmt_addr = saddr;
1359         ireq->opt = tcp_v4_save_options(sk, skb);
1360         if (!want_cookie)
1361                 TCP_ECN_create_request(req, skb->h.th);
1362
1363         if (want_cookie) {
1364 #ifdef CONFIG_SYN_COOKIES
1365                 syn_flood_warning(skb);
1366 #endif
1367                 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1368         } else if (!isn) {
1369                 struct inet_peer *peer = NULL;
1370
1371                 /* VJ's idea. We save last timestamp seen
1372                  * from the destination in peer table, when entering
1373                  * state TIME-WAIT, and check against it before
1374                  * accepting new connection request.
1375                  *
1376                  * If "isn" is not zero, this request hit alive
1377                  * timewait bucket, so that all the necessary checks
1378                  * are made in the function processing timewait state.
1379                  */
1380                 if (tmp_opt.saw_tstamp &&
1381                     tcp_death_row.sysctl_tw_recycle &&
1382                     (dst = inet_csk_route_req(sk, req)) != NULL &&
1383                     (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1384                     peer->v4daddr == saddr) {
1385                         if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
1386                             (s32)(peer->tcp_ts - req->ts_recent) >
1387                                                         TCP_PAWS_WINDOW) {
1388                                 NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
1389                                 dst_release(dst);
1390                                 goto drop_and_free;
1391                         }
1392                 }
1393                 /* Kill the following clause, if you dislike this way. */
1394                 else if (!sysctl_tcp_syncookies &&
1395                          (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1396                           (sysctl_max_syn_backlog >> 2)) &&
1397                          (!peer || !peer->tcp_ts_stamp) &&
1398                          (!dst || !dst_metric(dst, RTAX_RTT))) {
1399                         /* Without syncookies last quarter of
1400                          * backlog is filled with destinations,
1401                          * proven to be alive.
1402                          * It means that we continue to communicate
1403                          * to destinations, already remembered
1404                          * to the moment of synflood.
1405                          */
1406                         LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
1407                                        "request from %u.%u.%u.%u/%u\n",
1408                                        NIPQUAD(saddr),
1409                                        ntohs(skb->h.th->source));
1410                         dst_release(dst);
1411                         goto drop_and_free;
1412                 }
1413
1414                 isn = tcp_v4_init_sequence(skb);
1415         }
1416         tcp_rsk(req)->snt_isn = isn;
1417
1418         if (tcp_v4_send_synack(sk, req, dst))
1419                 goto drop_and_free;
1420
1421         if (want_cookie) {
1422                 reqsk_free(req);
1423         } else {
1424                 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1425         }
1426         return 0;
1427
1428 drop_and_free:
1429         reqsk_free(req);
1430 drop:
1431         return 0;
1432 }
1433
1434
1435 /*
1436  * The three way handshake has completed - we got a valid synack -
1437  * now create the new socket.
1438  */
1439 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1440                                   struct request_sock *req,
1441                                   struct dst_entry *dst)
1442 {
1443         struct inet_request_sock *ireq;
1444         struct inet_sock *newinet;
1445         struct tcp_sock *newtp;
1446         struct sock *newsk;
1447 #ifdef CONFIG_TCP_MD5SIG
1448         struct tcp_md5sig_key *key;
1449 #endif
1450
1451         if (sk_acceptq_is_full(sk))
1452                 goto exit_overflow;
1453
1454         if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
1455                 goto exit;
1456
1457         newsk = tcp_create_openreq_child(sk, req, skb);
1458         if (!newsk)
1459                 goto exit;
1460
1461         newsk->sk_gso_type = SKB_GSO_TCPV4;
1462         sk_setup_caps(newsk, dst);
1463
1464         newtp                 = tcp_sk(newsk);
1465         newinet               = inet_sk(newsk);
1466         ireq                  = inet_rsk(req);
1467         newinet->daddr        = ireq->rmt_addr;
1468         newinet->rcv_saddr    = ireq->loc_addr;
1469         newinet->saddr        = ireq->loc_addr;
1470         newinet->opt          = ireq->opt;
1471         ireq->opt             = NULL;
1472         newinet->mc_index     = inet_iif(skb);
1473         newinet->mc_ttl       = skb->nh.iph->ttl;
1474         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1475         if (newinet->opt)
1476                 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
1477         newinet->id = newtp->write_seq ^ jiffies;
1478
1479         tcp_mtup_init(newsk);
1480         tcp_sync_mss(newsk, dst_mtu(dst));
1481         newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1482         tcp_initialize_rcv_mss(newsk);
1483
1484 #ifdef CONFIG_TCP_MD5SIG
1485         /* Copy over the MD5 key from the original socket */
1486         if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) {
1487                 /*
1488                  * We're using one, so create a matching key
1489                  * on the newsk structure. If we fail to get
1490                  * memory, then we end up not copying the key
1491                  * across. Shucks.
1492                  */
1493                 char *newkey = kmalloc(key->keylen, GFP_ATOMIC);
1494                 if (newkey) {
1495                         memcpy(newkey, key->key, key->keylen);
1496                         tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr,
1497                                           newkey, key->keylen);
1498                 }
1499         }
1500 #endif
1501
1502         __inet_hash(&tcp_hashinfo, newsk, 0);
1503         __inet_inherit_port(&tcp_hashinfo, sk, newsk);
1504
1505         return newsk;
1506
1507 exit_overflow:
1508         NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1509 exit:
1510         NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1511         dst_release(dst);
1512         return NULL;
1513 }
1514
1515 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1516 {
1517         struct tcphdr *th = skb->h.th;
1518         struct iphdr *iph = skb->nh.iph;
1519         struct sock *nsk;
1520         struct request_sock **prev;
1521         /* Find possible connection requests. */
1522         struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1523                                                        iph->saddr, iph->daddr);
1524         if (req)
1525                 return tcp_check_req(sk, skb, req, prev);
1526
1527         nsk = inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr,
1528                                       th->source, skb->nh.iph->daddr,
1529                                       th->dest, inet_iif(skb));
1530
1531         if (nsk) {
1532                 if (nsk->sk_state != TCP_TIME_WAIT) {
1533                         bh_lock_sock(nsk);
1534                         return nsk;
1535                 }
1536                 inet_twsk_put(inet_twsk(nsk));
1537                 return NULL;
1538         }
1539
1540 #ifdef CONFIG_SYN_COOKIES
1541         if (!th->rst && !th->syn && th->ack)
1542                 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1543 #endif
1544         return sk;
1545 }
1546
1547 static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1548 {
1549         if (skb->ip_summed == CHECKSUM_COMPLETE) {
1550                 if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
1551                                   skb->nh.iph->daddr, skb->csum)) {
1552                         skb->ip_summed = CHECKSUM_UNNECESSARY;
1553                         return 0;
1554                 }
1555         }
1556
1557         skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, skb->nh.iph->daddr,
1558                                        skb->len, IPPROTO_TCP, 0);
1559
1560         if (skb->len <= 76) {
1561                 return __skb_checksum_complete(skb);
1562         }
1563         return 0;
1564 }
1565
1566
1567 /* The socket must have it's spinlock held when we get
1568  * here.
1569  *
1570  * We have a potential double-lock case here, so even when
1571  * doing backlog processing we use the BH locking scheme.
1572  * This is because we cannot sleep with the original spinlock
1573  * held.
1574  */
1575 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1576 {
1577         struct sock *rsk;
1578 #ifdef CONFIG_TCP_MD5SIG
1579         /*
1580          * We really want to reject the packet as early as possible
1581          * if:
1582          *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1583          *  o There is an MD5 option and we're not expecting one
1584          */
1585         if (tcp_v4_inbound_md5_hash (sk, skb))
1586                 goto discard;
1587 #endif
1588
1589         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1590                 TCP_CHECK_TIMER(sk);
1591                 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) {
1592                         rsk = sk;
1593                         goto reset;
1594                 }
1595                 TCP_CHECK_TIMER(sk);
1596                 return 0;
1597         }
1598
1599         if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb))
1600                 goto csum_err;
1601
1602         if (sk->sk_state == TCP_LISTEN) {
1603                 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1604                 if (!nsk)
1605                         goto discard;
1606
1607                 if (nsk != sk) {
1608                         if (tcp_child_process(sk, nsk, skb)) {
1609                                 rsk = nsk;
1610                                 goto reset;
1611                         }
1612                         return 0;
1613                 }
1614         }
1615
1616         TCP_CHECK_TIMER(sk);
1617         if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len)) {
1618                 rsk = sk;
1619                 goto reset;
1620         }
1621         TCP_CHECK_TIMER(sk);
1622         return 0;
1623
1624 reset:
1625         tcp_v4_send_reset(rsk, skb);
1626 discard:
1627         kfree_skb(skb);
1628         /* Be careful here. If this function gets more complicated and
1629          * gcc suffers from register pressure on the x86, sk (in %ebx)
1630          * might be destroyed here. This current version compiles correctly,
1631          * but you have been warned.
1632          */
1633         return 0;
1634
1635 csum_err:
1636         TCP_INC_STATS_BH(TCP_MIB_INERRS);
1637         goto discard;
1638 }
1639
1640 /*
1641  *      From tcp_input.c
1642  */
1643
1644 int tcp_v4_rcv(struct sk_buff *skb)
1645 {
1646         struct tcphdr *th;
1647         struct sock *sk;
1648         int ret;
1649
1650         if (skb->pkt_type != PACKET_HOST)
1651                 goto discard_it;
1652
1653         /* Count it even if it's bad */
1654         TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1655
1656         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1657                 goto discard_it;
1658
1659         th = skb->h.th;
1660
1661         if (th->doff < sizeof(struct tcphdr) / 4)
1662                 goto bad_packet;
1663         if (!pskb_may_pull(skb, th->doff * 4))
1664                 goto discard_it;
1665
1666         /* An explanation is required here, I think.
1667          * Packet length and doff are validated by header prediction,
1668          * provided case of th->doff==0 is eliminated.
1669          * So, we defer the checks. */
1670         if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1671              tcp_v4_checksum_init(skb)))
1672                 goto bad_packet;
1673
1674         th = skb->h.th;
1675         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1676         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1677                                     skb->len - th->doff * 4);
1678         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1679         TCP_SKB_CB(skb)->when    = 0;
1680         TCP_SKB_CB(skb)->flags   = skb->nh.iph->tos;
1681         TCP_SKB_CB(skb)->sacked  = 0;
1682
1683         sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source,
1684                            skb->nh.iph->daddr, th->dest,
1685                            inet_iif(skb));
1686
1687         if (!sk)
1688                 goto no_tcp_socket;
1689
1690 process:
1691         if (sk->sk_state == TCP_TIME_WAIT)
1692                 goto do_time_wait;
1693
1694         if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1695                 goto discard_and_relse;
1696         nf_reset(skb);
1697
1698         if (sk_filter(sk, skb))
1699                 goto discard_and_relse;
1700
1701         skb->dev = NULL;
1702
1703         bh_lock_sock_nested(sk);
1704         ret = 0;
1705         if (!sock_owned_by_user(sk)) {
1706 #ifdef CONFIG_NET_DMA
1707                 struct tcp_sock *tp = tcp_sk(sk);
1708                 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1709                         tp->ucopy.dma_chan = get_softnet_dma();
1710                 if (tp->ucopy.dma_chan)
1711                         ret = tcp_v4_do_rcv(sk, skb);
1712                 else
1713 #endif
1714                 {
1715                         if (!tcp_prequeue(sk, skb))
1716                         ret = tcp_v4_do_rcv(sk, skb);
1717                 }
1718         } else
1719                 sk_add_backlog(sk, skb);
1720         bh_unlock_sock(sk);
1721
1722         sock_put(sk);
1723
1724         return ret;
1725
1726 no_tcp_socket:
1727         if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1728                 goto discard_it;
1729
1730         if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1731 bad_packet:
1732                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1733         } else {
1734                 tcp_v4_send_reset(NULL, skb);
1735         }
1736
1737 discard_it:
1738         /* Discard frame. */
1739         kfree_skb(skb);
1740         return 0;
1741
1742 discard_and_relse:
1743         sock_put(sk);
1744         goto discard_it;
1745
1746 do_time_wait:
1747         if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1748                 inet_twsk_put(inet_twsk(sk));
1749                 goto discard_it;
1750         }
1751
1752         if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1753                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1754                 inet_twsk_put(inet_twsk(sk));
1755                 goto discard_it;
1756         }
1757         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1758         case TCP_TW_SYN: {
1759                 struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
1760                                                         skb->nh.iph->daddr,
1761                                                         th->dest,
1762                                                         inet_iif(skb));
1763                 if (sk2) {
1764                         inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1765                         inet_twsk_put(inet_twsk(sk));
1766                         sk = sk2;
1767                         goto process;
1768                 }
1769                 /* Fall through to ACK */
1770         }
1771         case TCP_TW_ACK:
1772                 tcp_v4_timewait_ack(sk, skb);
1773                 break;
1774         case TCP_TW_RST:
1775                 goto no_tcp_socket;
1776         case TCP_TW_SUCCESS:;
1777         }
1778         goto discard_it;
1779 }
1780
1781 /* VJ's idea. Save last timestamp seen from this destination
1782  * and hold it at least for normal timewait interval to use for duplicate
1783  * segment detection in subsequent connections, before they enter synchronized
1784  * state.
1785  */
1786
1787 int tcp_v4_remember_stamp(struct sock *sk)
1788 {
1789         struct inet_sock *inet = inet_sk(sk);
1790         struct tcp_sock *tp = tcp_sk(sk);
1791         struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1792         struct inet_peer *peer = NULL;
1793         int release_it = 0;
1794
1795         if (!rt || rt->rt_dst != inet->daddr) {
1796                 peer = inet_getpeer(inet->daddr, 1);
1797                 release_it = 1;
1798         } else {
1799                 if (!rt->peer)
1800                         rt_bind_peer(rt, 1);
1801                 peer = rt->peer;
1802         }
1803
1804         if (peer) {
1805                 if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
1806                     (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
1807                      peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
1808                         peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
1809                         peer->tcp_ts = tp->rx_opt.ts_recent;
1810                 }
1811                 if (release_it)
1812                         inet_putpeer(peer);
1813                 return 1;
1814         }
1815
1816         return 0;
1817 }
1818
1819 int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1820 {
1821         struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
1822
1823         if (peer) {
1824                 const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1825
1826                 if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
1827                     (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
1828                      peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
1829                         peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
1830                         peer->tcp_ts       = tcptw->tw_ts_recent;
1831                 }
1832                 inet_putpeer(peer);
1833                 return 1;
1834         }
1835
1836         return 0;
1837 }
1838
1839 struct inet_connection_sock_af_ops ipv4_specific = {
1840         .queue_xmit        = ip_queue_xmit,
1841         .send_check        = tcp_v4_send_check,
1842         .rebuild_header    = inet_sk_rebuild_header,
1843         .conn_request      = tcp_v4_conn_request,
1844         .syn_recv_sock     = tcp_v4_syn_recv_sock,
1845         .remember_stamp    = tcp_v4_remember_stamp,
1846         .net_header_len    = sizeof(struct iphdr),
1847         .setsockopt        = ip_setsockopt,
1848         .getsockopt        = ip_getsockopt,
1849         .addr2sockaddr     = inet_csk_addr2sockaddr,
1850         .sockaddr_len      = sizeof(struct sockaddr_in),
1851 #ifdef CONFIG_COMPAT
1852         .compat_setsockopt = compat_ip_setsockopt,
1853         .compat_getsockopt = compat_ip_getsockopt,
1854 #endif
1855 };
1856
1857 struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1858 #ifdef CONFIG_TCP_MD5SIG
1859         .md5_lookup             = tcp_v4_md5_lookup,
1860         .calc_md5_hash          = tcp_v4_calc_md5_hash,
1861         .md5_add                = tcp_v4_md5_add_func,
1862         .md5_parse              = tcp_v4_parse_md5_keys,
1863 #endif
1864 };
1865
1866 /* NOTE: A lot of things set to zero explicitly by call to
1867  *       sk_alloc() so need not be done here.
1868  */
1869 static int tcp_v4_init_sock(struct sock *sk)
1870 {
1871         struct inet_connection_sock *icsk = inet_csk(sk);
1872         struct tcp_sock *tp = tcp_sk(sk);
1873
1874         skb_queue_head_init(&tp->out_of_order_queue);
1875         tcp_init_xmit_timers(sk);
1876         tcp_prequeue_init(tp);
1877
1878         icsk->icsk_rto = TCP_TIMEOUT_INIT;
1879         tp->mdev = TCP_TIMEOUT_INIT;
1880
1881         /* So many TCP implementations out there (incorrectly) count the
1882          * initial SYN frame in their delayed-ACK and congestion control
1883          * algorithms that we must have the following bandaid to talk
1884          * efficiently to them.  -DaveM
1885          */
1886         tp->snd_cwnd = 2;
1887
1888         /* See draft-stevens-tcpca-spec-01 for discussion of the
1889          * initialization of these values.
1890          */
1891         tp->snd_ssthresh = 0x7fffffff;  /* Infinity */
1892         tp->snd_cwnd_clamp = ~0;
1893         tp->mss_cache = 536;
1894
1895         tp->reordering = sysctl_tcp_reordering;
1896         icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1897
1898         sk->sk_state = TCP_CLOSE;
1899
1900         sk->sk_write_space = sk_stream_write_space;
1901         sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1902
1903         icsk->icsk_af_ops = &ipv4_specific;
1904         icsk->icsk_sync_mss = tcp_sync_mss;
1905 #ifdef CONFIG_TCP_MD5SIG
1906         tp->af_specific = &tcp_sock_ipv4_specific;
1907 #endif
1908
1909         sk->sk_sndbuf = sysctl_tcp_wmem[1];
1910         sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1911
1912         atomic_inc(&tcp_sockets_allocated);
1913
1914         return 0;
1915 }
1916
1917 int tcp_v4_destroy_sock(struct sock *sk)
1918 {
1919         struct tcp_sock *tp = tcp_sk(sk);
1920
1921         tcp_clear_xmit_timers(sk);
1922
1923         tcp_cleanup_congestion_control(sk);
1924
1925         /* Cleanup up the write buffer. */
1926         sk_stream_writequeue_purge(sk);
1927
1928         /* Cleans up our, hopefully empty, out_of_order_queue. */
1929         __skb_queue_purge(&tp->out_of_order_queue);
1930
1931 #ifdef CONFIG_TCP_MD5SIG
1932         /* Clean up the MD5 key list, if any */
1933         if (tp->md5sig_info) {
1934                 tcp_v4_clear_md5_list(sk);
1935                 kfree(tp->md5sig_info);
1936                 tp->md5sig_info = NULL;
1937         }
1938 #endif
1939
1940 #ifdef CONFIG_NET_DMA
1941         /* Cleans up our sk_async_wait_queue */
1942         __skb_queue_purge(&sk->sk_async_wait_queue);
1943 #endif
1944
1945         /* Clean prequeue, it must be empty really */
1946         __skb_queue_purge(&tp->ucopy.prequeue);
1947
1948         /* Clean up a referenced TCP bind bucket. */
1949         if (inet_csk(sk)->icsk_bind_hash)
1950                 inet_put_port(&tcp_hashinfo, sk);
1951
1952         /*
1953          * If sendmsg cached page exists, toss it.
1954          */
1955         if (sk->sk_sndmsg_page) {
1956                 __free_page(sk->sk_sndmsg_page);
1957                 sk->sk_sndmsg_page = NULL;
1958         }
1959
1960         atomic_dec(&tcp_sockets_allocated);
1961
1962         return 0;
1963 }
1964
1965 EXPORT_SYMBOL(tcp_v4_destroy_sock);
1966
1967 #ifdef CONFIG_PROC_FS
1968 /* Proc filesystem TCP sock list dumping. */
1969
1970 static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
1971 {
1972         return hlist_empty(head) ? NULL :
1973                 list_entry(head->first, struct inet_timewait_sock, tw_node);
1974 }
1975
1976 static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1977 {
1978         return tw->tw_node.next ?
1979                 hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1980 }
1981
1982 static void *listening_get_next(struct seq_file *seq, void *cur)
1983 {
1984         struct inet_connection_sock *icsk;
1985         struct hlist_node *node;
1986         struct sock *sk = cur;
1987         struct tcp_iter_state* st = seq->private;
1988
1989         if (!sk) {
1990                 st->bucket = 0;
1991                 sk = sk_head(&tcp_hashinfo.listening_hash[0]);
1992                 goto get_sk;
1993         }
1994
1995         ++st->num;
1996
1997         if (st->state == TCP_SEQ_STATE_OPENREQ) {
1998                 struct request_sock *req = cur;
1999
2000                 icsk = inet_csk(st->syn_wait_sk);
2001                 req = req->dl_next;
2002                 while (1) {
2003                         while (req) {
2004                                 if (req->rsk_ops->family == st->family) {
2005                                         cur = req;
2006                                         goto out;
2007                                 }
2008                                 req = req->dl_next;
2009                         }
2010                         if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
2011                                 break;
2012 get_req:
2013                         req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
2014                 }
2015                 sk        = sk_next(st->syn_wait_sk);
2016                 st->state = TCP_SEQ_STATE_LISTENING;
2017                 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2018         } else {
2019                 icsk = inet_csk(sk);
2020                 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2021                 if (reqsk_queue_len(&icsk->icsk_accept_queue))
2022                         goto start_req;
2023                 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2024                 sk = sk_next(sk);
2025         }
2026 get_sk:
2027         sk_for_each_from(sk, node) {
2028                 if (sk->sk_family == st->family) {
2029                         cur = sk;
2030                         goto out;
2031                 }
2032                 icsk = inet_csk(sk);
2033                 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2034                 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2035 start_req:
2036                         st->uid         = sock_i_uid(sk);
2037                         st->syn_wait_sk = sk;
2038                         st->state       = TCP_SEQ_STATE_OPENREQ;
2039                         st->sbucket     = 0;
2040                         goto get_req;
2041                 }
2042                 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2043         }
2044         if (++st->bucket < INET_LHTABLE_SIZE) {
2045                 sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]);
2046                 goto get_sk;
2047         }
2048         cur = NULL;
2049 out:
2050         return cur;
2051 }
2052
2053 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2054 {
2055         void *rc = listening_get_next(seq, NULL);
2056
2057         while (rc && *pos) {
2058                 rc = listening_get_next(seq, rc);
2059                 --*pos;
2060         }
2061         return rc;
2062 }
2063
2064 static void *established_get_first(struct seq_file *seq)
2065 {
2066         struct tcp_iter_state* st = seq->private;
2067         void *rc = NULL;
2068
2069         for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
2070                 struct sock *sk;
2071                 struct hlist_node *node;
2072                 struct inet_timewait_sock *tw;
2073
2074                 /* We can reschedule _before_ having picked the target: */
2075                 cond_resched_softirq();
2076
2077                 read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
2078                 sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2079                         if (sk->sk_family != st->family) {
2080                                 continue;
2081                         }
2082                         rc = sk;
2083                         goto out;
2084                 }
2085                 st->state = TCP_SEQ_STATE_TIME_WAIT;
2086                 inet_twsk_for_each(tw, node,
2087                                    &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) {
2088                         if (tw->tw_family != st->family) {
2089                                 continue;
2090                         }
2091                         rc = tw;
2092                         goto out;
2093                 }
2094                 read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
2095                 st->state = TCP_SEQ_STATE_ESTABLISHED;
2096         }
2097 out:
2098         return rc;
2099 }
2100
2101 static void *established_get_next(struct seq_file *seq, void *cur)
2102 {
2103         struct sock *sk = cur;
2104         struct inet_timewait_sock *tw;
2105         struct hlist_node *node;
2106         struct tcp_iter_state* st = seq->private;
2107
2108         ++st->num;
2109
2110         if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2111                 tw = cur;
2112                 tw = tw_next(tw);
2113 get_tw:
2114                 while (tw && tw->tw_family != st->family) {
2115                         tw = tw_next(tw);
2116                 }
2117                 if (tw) {
2118                         cur = tw;
2119                         goto out;
2120                 }
2121                 read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
2122                 st->state = TCP_SEQ_STATE_ESTABLISHED;
2123
2124                 /* We can reschedule between buckets: */
2125                 cond_resched_softirq();
2126
2127                 if (++st->bucket < tcp_hashinfo.ehash_size) {
2128                         read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
2129                         sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
2130                 } else {
2131                         cur = NULL;
2132                         goto out;
2133                 }
2134         } else
2135                 sk = sk_next(sk);
2136
2137         sk_for_each_from(sk, node) {
2138                 if (sk->sk_family == st->family)
2139                         goto found;
2140         }
2141
2142         st->state = TCP_SEQ_STATE_TIME_WAIT;
2143         tw = tw_head(&tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain);
2144         goto get_tw;
2145 found:
2146         cur = sk;
2147 out:
2148         return cur;
2149 }
2150
2151 static void *established_get_idx(struct seq_file *seq, loff_t pos)
2152 {
2153         void *rc = established_get_first(seq);
2154
2155         while (rc && pos) {
2156                 rc = established_get_next(seq, rc);
2157                 --pos;
2158         }
2159         return rc;
2160 }
2161
2162 static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2163 {
2164         void *rc;
2165         struct tcp_iter_state* st = seq->private;
2166
2167         inet_listen_lock(&tcp_hashinfo);
2168         st->state = TCP_SEQ_STATE_LISTENING;
2169         rc        = listening_get_idx(seq, &pos);
2170
2171         if (!rc) {
2172                 inet_listen_unlock(&tcp_hashinfo);
2173                 local_bh_disable();
2174                 st->state = TCP_SEQ_STATE_ESTABLISHED;
2175                 rc        = established_get_idx(seq, pos);
2176         }
2177
2178         return rc;
2179 }
2180
2181 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2182 {
2183         struct tcp_iter_state* st = seq->private;
2184         st->state = TCP_SEQ_STATE_LISTENING;
2185         st->num = 0;
2186         return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2187 }
2188
2189 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2190 {
2191         void *rc = NULL;
2192         struct tcp_iter_state* st;
2193
2194         if (v == SEQ_START_TOKEN) {
2195                 rc = tcp_get_idx(seq, 0);
2196                 goto out;
2197         }
2198         st = seq->private;
2199
2200         switch (st->state) {
2201         case TCP_SEQ_STATE_OPENREQ:
2202         case TCP_SEQ_STATE_LISTENING:
2203                 rc = listening_get_next(seq, v);
2204                 if (!rc) {
2205                         inet_listen_unlock(&tcp_hashinfo);
2206                         local_bh_disable();
2207                         st->state = TCP_SEQ_STATE_ESTABLISHED;
2208                         rc        = established_get_first(seq);
2209                 }
2210                 break;
2211         case TCP_SEQ_STATE_ESTABLISHED:
2212         case TCP_SEQ_STATE_TIME_WAIT:
2213                 rc = established_get_next(seq, v);
2214                 break;
2215         }
2216 out:
2217         ++*pos;
2218         return rc;
2219 }
2220
2221 static void tcp_seq_stop(struct seq_file *seq, void *v)
2222 {
2223         struct tcp_iter_state* st = seq->private;
2224
2225         switch (st->state) {
2226         case TCP_SEQ_STATE_OPENREQ:
2227                 if (v) {
2228                         struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2229                         read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2230                 }
2231         case TCP_SEQ_STATE_LISTENING:
2232                 if (v != SEQ_START_TOKEN)
2233                         inet_listen_unlock(&tcp_hashinfo);
2234                 break;
2235         case TCP_SEQ_STATE_TIME_WAIT:
2236         case TCP_SEQ_STATE_ESTABLISHED:
2237                 if (v)
2238                         read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
2239                 local_bh_enable();
2240                 break;
2241         }
2242 }
2243
2244 static int tcp_seq_open(struct inode *inode, struct file *file)
2245 {
2246         struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2247         struct seq_file *seq;
2248         struct tcp_iter_state *s;
2249         int rc;
2250
2251         if (unlikely(afinfo == NULL))
2252                 return -EINVAL;
2253
2254         s = kzalloc(sizeof(*s), GFP_KERNEL);
2255         if (!s)
2256                 return -ENOMEM;
2257         s->family               = afinfo->family;
2258         s->seq_ops.start        = tcp_seq_start;
2259         s->seq_ops.next         = tcp_seq_next;
2260         s->seq_ops.show         = afinfo->seq_show;
2261         s->seq_ops.stop         = tcp_seq_stop;
2262
2263         rc = seq_open(file, &s->seq_ops);
2264         if (rc)
2265                 goto out_kfree;
2266         seq          = file->private_data;
2267         seq->private = s;
2268 out:
2269         return rc;
2270 out_kfree:
2271         kfree(s);
2272         goto out;
2273 }
2274
2275 int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
2276 {
2277         int rc = 0;
2278         struct proc_dir_entry *p;
2279
2280         if (!afinfo)
2281                 return -EINVAL;
2282         afinfo->seq_fops->owner         = afinfo->owner;
2283         afinfo->seq_fops->open          = tcp_seq_open;
2284         afinfo->seq_fops->read          = seq_read;
2285         afinfo->seq_fops->llseek        = seq_lseek;
2286         afinfo->seq_fops->release       = seq_release_private;
2287
2288         p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
2289         if (p)
2290                 p->data = afinfo;
2291         else
2292                 rc = -ENOMEM;
2293         return rc;
2294 }
2295
2296 void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
2297 {
2298         if (!afinfo)
2299                 return;
2300         proc_net_remove(afinfo->name);
2301         memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
2302 }
2303
2304 static void get_openreq4(struct sock *sk, struct request_sock *req,
2305                          char *tmpbuf, int i, int uid)
2306 {
2307         const struct inet_request_sock *ireq = inet_rsk(req);
2308         int ttd = req->expires - jiffies;
2309
2310         sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2311                 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p",
2312                 i,
2313                 ireq->loc_addr,
2314                 ntohs(inet_sk(sk)->sport),
2315                 ireq->rmt_addr,
2316                 ntohs(ireq->rmt_port),
2317                 TCP_SYN_RECV,
2318                 0, 0, /* could print option size, but that is af dependent. */
2319                 1,    /* timers active (only the expire timer) */
2320                 jiffies_to_clock_t(ttd),
2321                 req->retrans,
2322                 uid,
2323                 0,  /* non standard timer */
2324                 0, /* open_requests have no inode */
2325                 atomic_read(&sk->sk_refcnt),
2326                 req);
2327 }
2328
2329 static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
2330 {
2331         int timer_active;
2332         unsigned long timer_expires;
2333         struct tcp_sock *tp = tcp_sk(sp);
2334         const struct inet_connection_sock *icsk = inet_csk(sp);
2335         struct inet_sock *inet = inet_sk(sp);
2336         __be32 dest = inet->daddr;
2337         __be32 src = inet->rcv_saddr;
2338         __u16 destp = ntohs(inet->dport);
2339         __u16 srcp = ntohs(inet->sport);
2340
2341         if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2342                 timer_active    = 1;
2343                 timer_expires   = icsk->icsk_timeout;
2344         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2345                 timer_active    = 4;
2346                 timer_expires   = icsk->icsk_timeout;
2347         } else if (timer_pending(&sp->sk_timer)) {
2348                 timer_active    = 2;
2349                 timer_expires   = sp->sk_timer.expires;
2350         } else {
2351                 timer_active    = 0;
2352                 timer_expires = jiffies;
2353         }
2354
2355         sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2356                         "%08X %5d %8d %lu %d %p %u %u %u %u %d",
2357                 i, src, srcp, dest, destp, sp->sk_state,
2358                 tp->write_seq - tp->snd_una,
2359                 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
2360                 timer_active,
2361                 jiffies_to_clock_t(timer_expires - jiffies),
2362                 icsk->icsk_retransmits,
2363                 sock_i_uid(sp),
2364                 icsk->icsk_probes_out,
2365                 sock_i_ino(sp),
2366                 atomic_read(&sp->sk_refcnt), sp,
2367                 icsk->icsk_rto,
2368                 icsk->icsk_ack.ato,
2369                 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2370                 tp->snd_cwnd,
2371                 tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh);
2372 }
2373
2374 static void get_timewait4_sock(struct inet_timewait_sock *tw, char *tmpbuf, int i)
2375 {
2376         __be32 dest, src;
2377         __u16 destp, srcp;
2378         int ttd = tw->tw_ttd - jiffies;
2379
2380         if (ttd < 0)
2381                 ttd = 0;
2382
2383         dest  = tw->tw_daddr;
2384         src   = tw->tw_rcv_saddr;
2385         destp = ntohs(tw->tw_dport);
2386         srcp  = ntohs(tw->tw_sport);
2387
2388         sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2389                 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p",
2390                 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2391                 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2392                 atomic_read(&tw->tw_refcnt), tw);
2393 }
2394
2395 #define TMPSZ 150
2396
2397 static int tcp4_seq_show(struct seq_file *seq, void *v)
2398 {
2399         struct tcp_iter_state* st;
2400         char tmpbuf[TMPSZ + 1];
2401
2402         if (v == SEQ_START_TOKEN) {
2403                 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2404                            "  sl  local_address rem_address   st tx_queue "
2405                            "rx_queue tr tm->when retrnsmt   uid  timeout "
2406                            "inode");
2407                 goto out;
2408         }
2409         st = seq->private;
2410
2411         switch (st->state) {
2412         case TCP_SEQ_STATE_LISTENING:
2413         case TCP_SEQ_STATE_ESTABLISHED:
2414                 get_tcp4_sock(v, tmpbuf, st->num);
2415                 break;
2416         case TCP_SEQ_STATE_OPENREQ:
2417                 get_openreq4(st->syn_wait_sk, v, tmpbuf, st->num, st->uid);
2418                 break;
2419         case TCP_SEQ_STATE_TIME_WAIT:
2420                 get_timewait4_sock(v, tmpbuf, st->num);
2421                 break;
2422         }
2423         seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf);
2424 out:
2425         return 0;
2426 }
2427
2428 static struct file_operations tcp4_seq_fops;
2429 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2430         .owner          = THIS_MODULE,
2431         .name           = "tcp",
2432         .family         = AF_INET,
2433         .seq_show       = tcp4_seq_show,
2434         .seq_fops       = &tcp4_seq_fops,
2435 };
2436
2437 int __init tcp4_proc_init(void)
2438 {
2439         return tcp_proc_register(&tcp4_seq_afinfo);
2440 }
2441
2442 void tcp4_proc_exit(void)
2443 {
2444         tcp_proc_unregister(&tcp4_seq_afinfo);
2445 }
2446 #endif /* CONFIG_PROC_FS */
2447
2448 struct proto tcp_prot = {
2449         .name                   = "TCP",
2450         .owner                  = THIS_MODULE,
2451         .close                  = tcp_close,
2452         .connect                = tcp_v4_connect,
2453         .disconnect             = tcp_disconnect,
2454         .accept                 = inet_csk_accept,
2455         .ioctl                  = tcp_ioctl,
2456         .init                   = tcp_v4_init_sock,
2457         .destroy                = tcp_v4_destroy_sock,
2458         .shutdown               = tcp_shutdown,
2459         .setsockopt             = tcp_setsockopt,
2460         .getsockopt             = tcp_getsockopt,
2461         .sendmsg                = tcp_sendmsg,
2462         .recvmsg                = tcp_recvmsg,
2463         .backlog_rcv            = tcp_v4_do_rcv,
2464         .hash                   = tcp_v4_hash,
2465         .unhash                 = tcp_unhash,
2466         .get_port               = tcp_v4_get_port,
2467         .enter_memory_pressure  = tcp_enter_memory_pressure,
2468         .sockets_allocated      = &tcp_sockets_allocated,
2469         .orphan_count           = &tcp_orphan_count,
2470         .memory_allocated       = &tcp_memory_allocated,
2471         .memory_pressure        = &tcp_memory_pressure,
2472         .sysctl_mem             = sysctl_tcp_mem,
2473         .sysctl_wmem            = sysctl_tcp_wmem,
2474         .sysctl_rmem            = sysctl_tcp_rmem,
2475         .max_header             = MAX_TCP_HEADER,
2476         .obj_size               = sizeof(struct tcp_sock),
2477         .twsk_prot              = &tcp_timewait_sock_ops,
2478         .rsk_prot               = &tcp_request_sock_ops,
2479 #ifdef CONFIG_COMPAT
2480         .compat_setsockopt      = compat_tcp_setsockopt,
2481         .compat_getsockopt      = compat_tcp_getsockopt,
2482 #endif
2483 };
2484
2485 void __init tcp_v4_init(struct net_proto_family *ops)
2486 {
2487         if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW, IPPROTO_TCP) < 0)
2488                 panic("Failed to create the TCP control socket.\n");
2489 }
2490
2491 EXPORT_SYMBOL(ipv4_specific);
2492 EXPORT_SYMBOL(tcp_hashinfo);
2493 EXPORT_SYMBOL(tcp_prot);
2494 EXPORT_SYMBOL(tcp_unhash);
2495 EXPORT_SYMBOL(tcp_v4_conn_request);
2496 EXPORT_SYMBOL(tcp_v4_connect);
2497 EXPORT_SYMBOL(tcp_v4_do_rcv);
2498 EXPORT_SYMBOL(tcp_v4_remember_stamp);
2499 EXPORT_SYMBOL(tcp_v4_send_check);
2500 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2501
2502 #ifdef CONFIG_PROC_FS
2503 EXPORT_SYMBOL(tcp_proc_register);
2504 EXPORT_SYMBOL(tcp_proc_unregister);
2505 #endif
2506 EXPORT_SYMBOL(sysctl_local_port_range);
2507 EXPORT_SYMBOL(sysctl_tcp_low_latency);
2508