2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * Implementation of the Transmission Control Protocol(TCP).
8 * Version: $Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $
10 * IPv4 specific functions
15 * linux/ipv4/tcp_input.c
16 * linux/ipv4/tcp_output.c
18 * See tcp.c for author information
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
28 * David S. Miller : New socket lookup architecture.
29 * This code is dedicated to John Dyson.
30 * David S. Miller : Change semantics of established hash,
31 * half is devoted to TIME_WAIT sockets
32 * and the rest go in the other half.
33 * Andi Kleen : Add support for syncookies and fixed
34 * some bugs: ip options weren't passed to
35 * the TCP layer, missed a check for an
37 * Andi Kleen : Implemented fast path mtu discovery.
38 * Fixed many serious bugs in the
39 * request_sock handling and moved
40 * most of it into the af independent code.
41 * Added tail drop and some other bugfixes.
42 * Added new listen semantics.
43 * Mike McLagan : Routing by source
44 * Juan Jose Ciarlante: ip_dynaddr bits
45 * Andi Kleen: various fixes.
46 * Vitaly E. Lavrov : Transparent proxy revived after year
48 * Andi Kleen : Fix new listen.
49 * Andi Kleen : Fix accept error reporting.
50 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
51 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
52 * a single port at the same time.
56 #include <linux/types.h>
57 #include <linux/fcntl.h>
58 #include <linux/module.h>
59 #include <linux/random.h>
60 #include <linux/cache.h>
61 #include <linux/jhash.h>
62 #include <linux/init.h>
63 #include <linux/times.h>
66 #include <net/inet_hashtables.h>
68 #include <net/transp_v6.h>
70 #include <net/inet_common.h>
71 #include <net/timewait_sock.h>
73 #include <net/netdma.h>
75 #include <linux/inet.h>
76 #include <linux/ipv6.h>
77 #include <linux/stddef.h>
78 #include <linux/proc_fs.h>
79 #include <linux/seq_file.h>
81 #include <linux/crypto.h>
82 #include <linux/scatterlist.h>
84 int sysctl_tcp_tw_reuse __read_mostly;
85 int sysctl_tcp_low_latency __read_mostly;
87 /* Check TCP sequence numbers in ICMP packets. */
88 #define ICMP_MIN_LENGTH 8
90 /* Socket used for sending RSTs */
91 static struct socket *tcp_socket;
93 void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
95 #ifdef CONFIG_TCP_MD5SIG
96 static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr);
97 static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
98 __be32 saddr, __be32 daddr, struct tcphdr *th,
99 int protocol, int tcplen);
102 struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
103 .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock),
104 .lhash_users = ATOMIC_INIT(0),
105 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
108 static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
110 return inet_csk_get_port(&tcp_hashinfo, sk, snum,
111 inet_csk_bind_conflict);
114 static void tcp_v4_hash(struct sock *sk)
116 inet_hash(&tcp_hashinfo, sk);
119 void tcp_unhash(struct sock *sk)
121 inet_unhash(&tcp_hashinfo, sk);
124 static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
126 return secure_tcp_sequence_number(skb->nh.iph->daddr,
132 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
134 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
135 struct tcp_sock *tp = tcp_sk(sk);
137 /* With PAWS, it is safe from the viewpoint
138 of data integrity. Even without PAWS it is safe provided sequence
139 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
141 Actually, the idea is close to VJ's one, only timestamp cache is
142 held not per host, but per port pair and TW bucket is used as state
145 If TW bucket has been already destroyed we fall back to VJ's scheme
146 and use initial timestamp retrieved from peer table.
148 if (tcptw->tw_ts_recent_stamp &&
149 (twp == NULL || (sysctl_tcp_tw_reuse &&
150 xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
151 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
152 if (tp->write_seq == 0)
154 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
155 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
163 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
165 /* This will initiate an outgoing connection. */
166 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
168 struct inet_sock *inet = inet_sk(sk);
169 struct tcp_sock *tp = tcp_sk(sk);
170 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
172 __be32 daddr, nexthop;
176 if (addr_len < sizeof(struct sockaddr_in))
179 if (usin->sin_family != AF_INET)
180 return -EAFNOSUPPORT;
182 nexthop = daddr = usin->sin_addr.s_addr;
183 if (inet->opt && inet->opt->srr) {
186 nexthop = inet->opt->faddr;
189 tmp = ip_route_connect(&rt, nexthop, inet->saddr,
190 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
192 inet->sport, usin->sin_port, sk);
196 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
201 if (!inet->opt || !inet->opt->srr)
205 inet->saddr = rt->rt_src;
206 inet->rcv_saddr = inet->saddr;
208 if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
209 /* Reset inherited state */
210 tp->rx_opt.ts_recent = 0;
211 tp->rx_opt.ts_recent_stamp = 0;
215 if (tcp_death_row.sysctl_tw_recycle &&
216 !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
217 struct inet_peer *peer = rt_get_peer(rt);
219 /* VJ's idea. We save last timestamp seen from
220 * the destination in peer table, when entering state TIME-WAIT
221 * and initialize rx_opt.ts_recent from it, when trying new connection.
224 if (peer && peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {
225 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
226 tp->rx_opt.ts_recent = peer->tcp_ts;
230 inet->dport = usin->sin_port;
233 inet_csk(sk)->icsk_ext_hdr_len = 0;
235 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
237 tp->rx_opt.mss_clamp = 536;
239 /* Socket identity is still unknown (sport may be zero).
240 * However we set state to SYN-SENT and not releasing socket
241 * lock select source port, enter ourselves into the hash tables and
242 * complete initialization after this.
244 tcp_set_state(sk, TCP_SYN_SENT);
245 err = inet_hash_connect(&tcp_death_row, sk);
249 err = ip_route_newports(&rt, IPPROTO_TCP, inet->sport, inet->dport, sk);
253 /* OK, now commit destination to socket. */
254 sk->sk_gso_type = SKB_GSO_TCPV4;
255 sk_setup_caps(sk, &rt->u.dst);
258 tp->write_seq = secure_tcp_sequence_number(inet->saddr,
263 inet->id = tp->write_seq ^ jiffies;
265 err = tcp_connect(sk);
273 /* This unhashes the socket and releases the local port, if necessary. */
274 tcp_set_state(sk, TCP_CLOSE);
276 sk->sk_route_caps = 0;
282 * This routine does path mtu discovery as defined in RFC1191.
284 static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
286 struct dst_entry *dst;
287 struct inet_sock *inet = inet_sk(sk);
289 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
290 * send out by Linux are always <576bytes so they should go through
293 if (sk->sk_state == TCP_LISTEN)
296 /* We don't check in the destentry if pmtu discovery is forbidden
297 * on this route. We just assume that no packet_to_big packets
298 * are send back when pmtu discovery is not active.
299 * There is a small race when the user changes this flag in the
300 * route, but I think that's acceptable.
302 if ((dst = __sk_dst_check(sk, 0)) == NULL)
305 dst->ops->update_pmtu(dst, mtu);
307 /* Something is about to be wrong... Remember soft error
308 * for the case, if this connection will not able to recover.
310 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
311 sk->sk_err_soft = EMSGSIZE;
315 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
316 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
317 tcp_sync_mss(sk, mtu);
319 /* Resend the TCP packet because it's
320 * clear that the old packet has been
321 * dropped. This is the new "fast" path mtu
324 tcp_simple_retransmit(sk);
325 } /* else let the usual retransmit timer handle it */
329 * This routine is called by the ICMP module when it gets some
330 * sort of error condition. If err < 0 then the socket should
331 * be closed and the error returned to the user. If err > 0
332 * it's just the icmp type << 8 | icmp code. After adjustment
333 * header points to the first 8 bytes of the tcp header. We need
334 * to find the appropriate port.
336 * The locking strategy used here is very "optimistic". When
337 * someone else accesses the socket the ICMP is just dropped
338 * and for some paths there is no check at all.
339 * A more general error queue to queue errors for later handling
340 * is probably better.
344 void tcp_v4_err(struct sk_buff *skb, u32 info)
346 struct iphdr *iph = (struct iphdr *)skb->data;
347 struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
349 struct inet_sock *inet;
350 int type = skb->h.icmph->type;
351 int code = skb->h.icmph->code;
356 if (skb->len < (iph->ihl << 2) + 8) {
357 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
361 sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr,
362 th->source, inet_iif(skb));
364 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
367 if (sk->sk_state == TCP_TIME_WAIT) {
368 inet_twsk_put(inet_twsk(sk));
373 /* If too many ICMPs get dropped on busy
374 * servers this needs to be solved differently.
376 if (sock_owned_by_user(sk))
377 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
379 if (sk->sk_state == TCP_CLOSE)
383 seq = ntohl(th->seq);
384 if (sk->sk_state != TCP_LISTEN &&
385 !between(seq, tp->snd_una, tp->snd_nxt)) {
386 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
391 case ICMP_SOURCE_QUENCH:
392 /* Just silently ignore these. */
394 case ICMP_PARAMETERPROB:
397 case ICMP_DEST_UNREACH:
398 if (code > NR_ICMP_UNREACH)
401 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
402 if (!sock_owned_by_user(sk))
403 do_pmtu_discovery(sk, iph, info);
407 err = icmp_err_convert[code].errno;
409 case ICMP_TIME_EXCEEDED:
416 switch (sk->sk_state) {
417 struct request_sock *req, **prev;
419 if (sock_owned_by_user(sk))
422 req = inet_csk_search_req(sk, &prev, th->dest,
423 iph->daddr, iph->saddr);
427 /* ICMPs are not backlogged, hence we cannot get
428 an established socket here.
432 if (seq != tcp_rsk(req)->snt_isn) {
433 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
438 * Still in SYN_RECV, just remove it silently.
439 * There is no good way to pass the error to the newly
440 * created socket, and POSIX does not want network
441 * errors returned from accept().
443 inet_csk_reqsk_queue_drop(sk, req, prev);
447 case TCP_SYN_RECV: /* Cannot happen.
448 It can f.e. if SYNs crossed.
450 if (!sock_owned_by_user(sk)) {
453 sk->sk_error_report(sk);
457 sk->sk_err_soft = err;
462 /* If we've already connected we will keep trying
463 * until we time out, or the user gives up.
465 * rfc1122 4.2.3.9 allows to consider as hard errors
466 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
467 * but it is obsoleted by pmtu discovery).
469 * Note, that in modern internet, where routing is unreliable
470 * and in each dark corner broken firewalls sit, sending random
471 * errors ordered by their masters even this two messages finally lose
472 * their original sense (even Linux sends invalid PORT_UNREACHs)
474 * Now we are in compliance with RFCs.
479 if (!sock_owned_by_user(sk) && inet->recverr) {
481 sk->sk_error_report(sk);
482 } else { /* Only an error on timeout */
483 sk->sk_err_soft = err;
491 /* This routine computes an IPv4 TCP checksum. */
492 void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
494 struct inet_sock *inet = inet_sk(sk);
495 struct tcphdr *th = skb->h.th;
497 if (skb->ip_summed == CHECKSUM_PARTIAL) {
498 th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0);
499 skb->csum = offsetof(struct tcphdr, check);
501 th->check = tcp_v4_check(th, len, inet->saddr, inet->daddr,
502 csum_partial((char *)th,
508 int tcp_v4_gso_send_check(struct sk_buff *skb)
513 if (!pskb_may_pull(skb, sizeof(*th)))
520 th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0);
521 skb->csum = offsetof(struct tcphdr, check);
522 skb->ip_summed = CHECKSUM_PARTIAL;
527 * This routine will send an RST to the other tcp.
529 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
531 * Answer: if a packet caused RST, it is not for a socket
532 * existing in our system, if it is matched to a socket,
533 * it is just duplicate segment or bug in other side's TCP.
534 * So that we build reply only basing on parameters
535 * arrived with segment.
536 * Exception: precedence violation. We do not implement it in any case.
539 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
541 struct tcphdr *th = skb->h.th;
544 #ifdef CONFIG_TCP_MD5SIG
545 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
548 struct ip_reply_arg arg;
549 #ifdef CONFIG_TCP_MD5SIG
550 struct tcp_md5sig_key *key;
553 /* Never send a reset in response to a reset. */
557 if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL)
560 /* Swap the send and the receive. */
561 memset(&rep, 0, sizeof(rep));
562 rep.th.dest = th->source;
563 rep.th.source = th->dest;
564 rep.th.doff = sizeof(struct tcphdr) / 4;
568 rep.th.seq = th->ack_seq;
571 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
572 skb->len - (th->doff << 2));
575 memset(&arg, 0, sizeof arg);
576 arg.iov[0].iov_base = (unsigned char *)&rep;
577 arg.iov[0].iov_len = sizeof(rep.th);
579 #ifdef CONFIG_TCP_MD5SIG
580 key = sk ? tcp_v4_md5_do_lookup(sk, skb->nh.iph->daddr) : NULL;
582 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
584 (TCPOPT_MD5SIG << 8) |
586 /* Update length and the length the header thinks exists */
587 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
588 rep.th.doff = arg.iov[0].iov_len / 4;
590 tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1],
594 &rep.th, IPPROTO_TCP,
599 arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
600 skb->nh.iph->saddr, /*XXX*/
601 sizeof(struct tcphdr), IPPROTO_TCP, 0);
602 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
604 ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
606 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
607 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
610 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
611 outside socket context is ugly, certainly. What can I do?
614 static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
615 struct sk_buff *skb, u32 seq, u32 ack,
618 struct tcphdr *th = skb->h.th;
621 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
622 #ifdef CONFIG_TCP_MD5SIG
623 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
627 struct ip_reply_arg arg;
628 #ifdef CONFIG_TCP_MD5SIG
629 struct tcp_md5sig_key *key;
630 struct tcp_md5sig_key tw_key;
633 memset(&rep.th, 0, sizeof(struct tcphdr));
634 memset(&arg, 0, sizeof arg);
636 arg.iov[0].iov_base = (unsigned char *)&rep;
637 arg.iov[0].iov_len = sizeof(rep.th);
639 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
640 (TCPOPT_TIMESTAMP << 8) |
642 rep.opt[1] = htonl(tcp_time_stamp);
643 rep.opt[2] = htonl(ts);
644 arg.iov[0].iov_len = TCPOLEN_TSTAMP_ALIGNED;
647 /* Swap the send and the receive. */
648 rep.th.dest = th->source;
649 rep.th.source = th->dest;
650 rep.th.doff = arg.iov[0].iov_len / 4;
651 rep.th.seq = htonl(seq);
652 rep.th.ack_seq = htonl(ack);
654 rep.th.window = htons(win);
656 #ifdef CONFIG_TCP_MD5SIG
658 * The SKB holds an imcoming packet, but may not have a valid ->sk
659 * pointer. This is especially the case when we're dealing with a
660 * TIME_WAIT ack, because the sk structure is long gone, and only
661 * the tcp_timewait_sock remains. So the md5 key is stashed in that
662 * structure, and we use it in preference. I believe that (twsk ||
663 * skb->sk) holds true, but we program defensively.
665 if (!twsk && skb->sk) {
666 key = tcp_v4_md5_do_lookup(skb->sk, skb->nh.iph->daddr);
667 } else if (twsk && twsk->tw_md5_keylen) {
668 tw_key.key = twsk->tw_md5_key;
669 tw_key.keylen = twsk->tw_md5_keylen;
676 int offset = (ts) ? 3 : 0;
678 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
680 (TCPOPT_MD5SIG << 8) |
682 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
683 rep.th.doff = arg.iov[0].iov_len/4;
685 tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset],
689 &rep.th, IPPROTO_TCP,
694 arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
695 skb->nh.iph->saddr, /*XXX*/
696 arg.iov[0].iov_len, IPPROTO_TCP, 0);
697 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
699 ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
701 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
704 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
706 struct inet_timewait_sock *tw = inet_twsk(sk);
707 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
709 tcp_v4_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
710 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_ts_recent);
715 static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
717 tcp_v4_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1,
718 tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
723 * Send a SYN-ACK after having received an ACK.
724 * This still operates on a request_sock only, not on a big
727 static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
728 struct dst_entry *dst)
730 const struct inet_request_sock *ireq = inet_rsk(req);
732 struct sk_buff * skb;
734 /* First, grab a route. */
735 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
738 skb = tcp_make_synack(sk, dst, req);
741 struct tcphdr *th = skb->h.th;
743 th->check = tcp_v4_check(th, skb->len,
746 csum_partial((char *)th, skb->len,
749 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
752 err = net_xmit_eval(err);
761 * IPv4 request_sock destructor.
763 static void tcp_v4_reqsk_destructor(struct request_sock *req)
765 kfree(inet_rsk(req)->opt);
768 #ifdef CONFIG_SYN_COOKIES
769 static void syn_flood_warning(struct sk_buff *skb)
771 static unsigned long warntime;
773 if (time_after(jiffies, (warntime + HZ * 60))) {
776 "possible SYN flooding on port %d. Sending cookies.\n",
777 ntohs(skb->h.th->dest));
783 * Save and compile IPv4 options into the request_sock if needed.
785 static struct ip_options *tcp_v4_save_options(struct sock *sk,
788 struct ip_options *opt = &(IPCB(skb)->opt);
789 struct ip_options *dopt = NULL;
791 if (opt && opt->optlen) {
792 int opt_size = optlength(opt);
793 dopt = kmalloc(opt_size, GFP_ATOMIC);
795 if (ip_options_echo(dopt, skb)) {
804 #ifdef CONFIG_TCP_MD5SIG
806 * RFC2385 MD5 checksumming requires a mapping of
807 * IP address->MD5 Key.
808 * We need to maintain these in the sk structure.
811 /* Find the Key structure for an address. */
812 static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
814 struct tcp_sock *tp = tcp_sk(sk);
817 if (!tp->md5sig_info || !tp->md5sig_info->entries4)
819 for (i = 0; i < tp->md5sig_info->entries4; i++) {
820 if (tp->md5sig_info->keys4[i].addr == addr)
821 return (struct tcp_md5sig_key *)&tp->md5sig_info->keys4[i];
826 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
827 struct sock *addr_sk)
829 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr);
832 EXPORT_SYMBOL(tcp_v4_md5_lookup);
834 struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
835 struct request_sock *req)
837 return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
840 /* This can be called on a newly created socket, from other files */
841 int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
842 u8 *newkey, u8 newkeylen)
844 /* Add Key to the list */
845 struct tcp4_md5sig_key *key;
846 struct tcp_sock *tp = tcp_sk(sk);
847 struct tcp4_md5sig_key *keys;
849 key = (struct tcp4_md5sig_key *) tcp_v4_md5_do_lookup(sk, addr);
851 /* Pre-existing entry - just update that one. */
854 key->keylen = newkeylen;
856 if (!tp->md5sig_info) {
857 tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), GFP_ATOMIC);
858 if (!tp->md5sig_info) {
863 if (tcp_alloc_md5sig_pool() == NULL) {
867 if (tp->md5sig_info->alloced4 == tp->md5sig_info->entries4) {
868 keys = kmalloc((sizeof(struct tcp4_md5sig_key) *
869 (tp->md5sig_info->entries4 + 1)), GFP_ATOMIC);
872 tcp_free_md5sig_pool();
876 if (tp->md5sig_info->entries4)
877 memcpy(keys, tp->md5sig_info->keys4,
878 (sizeof (struct tcp4_md5sig_key) *
879 tp->md5sig_info->entries4));
881 /* Free old key list, and reference new one */
882 if (tp->md5sig_info->keys4)
883 kfree(tp->md5sig_info->keys4);
884 tp->md5sig_info->keys4 = keys;
885 tp->md5sig_info->alloced4++;
887 tp->md5sig_info->entries4++;
888 tp->md5sig_info->keys4[tp->md5sig_info->entries4 - 1].addr = addr;
889 tp->md5sig_info->keys4[tp->md5sig_info->entries4 - 1].key = newkey;
890 tp->md5sig_info->keys4[tp->md5sig_info->entries4 - 1].keylen = newkeylen;
895 EXPORT_SYMBOL(tcp_v4_md5_do_add);
897 static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
898 u8 *newkey, u8 newkeylen)
900 return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr,
904 int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
906 struct tcp_sock *tp = tcp_sk(sk);
909 for (i = 0; i < tp->md5sig_info->entries4; i++) {
910 if (tp->md5sig_info->keys4[i].addr == addr) {
912 kfree(tp->md5sig_info->keys4[i].key);
913 tp->md5sig_info->entries4--;
915 if (tp->md5sig_info->entries4 == 0) {
916 kfree(tp->md5sig_info->keys4);
917 tp->md5sig_info->keys4 = NULL;
919 /* Need to do some manipulation */
920 if (tp->md5sig_info->entries4 != i)
921 memcpy(&tp->md5sig_info->keys4[i],
922 &tp->md5sig_info->keys4[i+1],
923 (tp->md5sig_info->entries4 - i)
924 * sizeof (struct tcp4_md5sig_key));
926 tcp_free_md5sig_pool();
933 EXPORT_SYMBOL(tcp_v4_md5_do_del);
935 static void tcp_v4_clear_md5_list (struct sock *sk)
937 struct tcp_sock *tp = tcp_sk(sk);
939 /* Free each key, then the set of key keys,
940 * the crypto element, and then decrement our
941 * hold on the last resort crypto.
943 if (tp->md5sig_info->entries4) {
945 for (i = 0; i < tp->md5sig_info->entries4; i++)
946 kfree(tp->md5sig_info->keys4[i].key);
947 tp->md5sig_info->entries4 = 0;
948 tcp_free_md5sig_pool();
950 if (tp->md5sig_info->keys4) {
951 kfree(tp->md5sig_info->keys4);
952 tp->md5sig_info->keys4 = NULL;
953 tp->md5sig_info->alloced4 = 0;
957 static int tcp_v4_parse_md5_keys (struct sock *sk, char __user *optval,
960 struct tcp_md5sig cmd;
961 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
964 if (optlen < sizeof(cmd))
967 if (copy_from_user (&cmd, optval, sizeof(cmd)))
970 if (sin->sin_family != AF_INET)
973 if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
974 if (!tcp_sk(sk)->md5sig_info)
976 return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
979 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
982 if (!tcp_sk(sk)->md5sig_info) {
983 struct tcp_sock *tp = tcp_sk(sk);
984 struct tcp_md5sig_info *p;
986 p = kzalloc(sizeof(struct tcp_md5sig_info), GFP_KERNEL);
994 newkey = kmalloc(cmd.tcpm_keylen, GFP_KERNEL);
997 memcpy(newkey, cmd.tcpm_key, cmd.tcpm_keylen);
998 return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
999 newkey, cmd.tcpm_keylen);
1002 static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
1003 __be32 saddr, __be32 daddr,
1004 struct tcphdr *th, int protocol,
1007 struct scatterlist sg[4];
1010 #ifdef CONFIG_TCP_MD5SIG_DEBUG
1014 struct tcp_md5sig_pool *hp;
1015 struct tcp4_pseudohdr *bp;
1016 struct hash_desc *desc;
1018 unsigned int nbytes = 0;
1021 * Okay, so RFC2385 is turned on for this connection,
1022 * so we need to generate the MD5 hash for the packet now.
1025 hp = tcp_get_md5sig_pool();
1027 goto clear_hash_noput;
1029 bp = &hp->md5_blk.ip4;
1030 desc = &hp->md5_desc;
1033 * 1. the TCP pseudo-header (in the order: source IP address,
1034 * destination IP address, zero-padded protocol number, and
1040 bp->protocol = protocol;
1041 bp->len = htons(tcplen);
1042 sg_set_buf(&sg[block++], bp, sizeof(*bp));
1043 nbytes += sizeof(*bp);
1045 #ifdef CONFIG_TCP_MD5SIG_DEBUG
1046 printk("Calcuating hash for: ");
1047 for (i = 0; i < sizeof (*bp); i++)
1048 printk ("%02x ", (unsigned int)((unsigned char *)bp)[i]);
1052 /* 2. the TCP header, excluding options, and assuming a
1055 old_checksum = th->check;
1057 sg_set_buf(&sg[block++], th, sizeof(struct tcphdr));
1058 nbytes += sizeof(struct tcphdr);
1059 #ifdef CONFIG_TCP_MD5SIG_DEBUG
1060 for (i = 0; i < sizeof (struct tcphdr); i++)
1061 printk (" %02x", (unsigned int)((unsigned char *)th)[i]);
1063 /* 3. the TCP segment data (if any) */
1064 data_len = tcplen - (th->doff << 2);
1066 unsigned char *data = (unsigned char *)th + (th->doff << 2);
1067 sg_set_buf(&sg[block++], data, data_len);
1071 /* 4. an independently-specified key or password, known to both
1072 * TCPs and presumably connection-specific
1074 sg_set_buf(&sg[block++], key->key, key->keylen);
1075 nbytes += key->keylen;
1077 #ifdef CONFIG_TCP_MD5SIG_DEBUG
1078 printk (" and password: ");
1079 for (i = 0; i < key->keylen; i++)
1080 printk ("%02x ", (unsigned int)key->key[i]);
1083 /* Now store the Hash into the packet */
1084 err = crypto_hash_init(desc);
1087 err = crypto_hash_update(desc, sg, nbytes);
1090 err = crypto_hash_final(desc, md5_hash);
1094 /* Reset header, and free up the crypto */
1095 tcp_put_md5sig_pool();
1096 th->check = old_checksum;
1099 #ifdef CONFIG_TCP_MD5SIG_DEBUG
1101 for (i = 0; i < 16; i++)
1102 printk (" %02x", (unsigned int)(((u8*)md5_hash)[i]));
1107 tcp_put_md5sig_pool();
1109 memset(md5_hash, 0, 16);
1113 int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
1115 struct dst_entry *dst,
1116 struct request_sock *req,
1117 struct tcphdr *th, int protocol,
1120 __be32 saddr, daddr;
1123 saddr = inet_sk(sk)->saddr;
1124 daddr = inet_sk(sk)->daddr;
1126 struct rtable *rt = (struct rtable *)dst;
1131 return tcp_v4_do_calc_md5_hash(md5_hash, key,
1133 th, protocol, tcplen);
1136 EXPORT_SYMBOL(tcp_v4_calc_md5_hash);
1138 static int tcp_v4_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
1141 * This gets called for each TCP segment that arrives
1142 * so we want to be efficient.
1143 * We have 3 drop cases:
1144 * o No MD5 hash and one expected.
1145 * o MD5 hash and we're not expecting one.
1146 * o MD5 hash and its wrong.
1148 __u8 *hash_location = NULL;
1149 struct tcp_md5sig_key *hash_expected;
1150 struct iphdr *iph = skb->nh.iph;
1151 struct tcphdr *th = skb->h.th;
1152 int length = (th->doff << 2) - sizeof (struct tcphdr);
1155 unsigned char newhash[16];
1157 hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
1160 * If the TCP option length is less than the TCP_MD5SIG
1161 * option length, then we can shortcut
1163 if (length < TCPOLEN_MD5SIG) {
1170 /* Okay, we can't shortcut - we have to grub through the options */
1171 ptr = (unsigned char *)(th + 1);
1172 while (length > 0) {
1173 int opcode = *ptr++;
1186 if (opsize > length)
1189 if (opcode == TCPOPT_MD5SIG) {
1190 hash_location = ptr;
1198 /* We've parsed the options - do we have a hash? */
1199 if (!hash_expected && !hash_location)
1202 if (hash_expected && !hash_location) {
1203 if (net_ratelimit()) {
1204 printk(KERN_INFO "MD5 Hash NOT expected but found "
1205 "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
1206 NIPQUAD (iph->saddr), ntohs(th->source),
1207 NIPQUAD (iph->daddr), ntohs(th->dest));
1212 if (!hash_expected && hash_location) {
1213 if (net_ratelimit()) {
1214 printk(KERN_INFO "MD5 Hash NOT expected but found "
1215 "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
1216 NIPQUAD (iph->saddr), ntohs(th->source),
1217 NIPQUAD (iph->daddr), ntohs(th->dest));
1222 /* Okay, so this is hash_expected and hash_location -
1223 * so we need to calculate the checksum.
1225 genhash = tcp_v4_do_calc_md5_hash(newhash,
1227 iph->saddr, iph->daddr,
1228 th, sk->sk_protocol,
1231 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1232 if (net_ratelimit()) {
1233 printk(KERN_INFO "MD5 Hash failed for "
1234 "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)%s\n",
1235 NIPQUAD (iph->saddr), ntohs(th->source),
1236 NIPQUAD (iph->daddr), ntohs(th->dest),
1237 genhash ? " tcp_v4_calc_md5_hash failed" : "");
1238 #ifdef CONFIG_TCP_MD5SIG_DEBUG
1241 printk("Received: ");
1242 for (i = 0; i < 16; i++)
1243 printk("%02x ", 0xff & (int)hash_location[i]);
1245 printk("Calculated: ");
1246 for (i = 0; i < 16; i++)
1247 printk("%02x ", 0xff & (int)newhash[i]);
1259 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1261 .obj_size = sizeof(struct tcp_request_sock),
1262 .rtx_syn_ack = tcp_v4_send_synack,
1263 .send_ack = tcp_v4_reqsk_send_ack,
1264 .destructor = tcp_v4_reqsk_destructor,
1265 .send_reset = tcp_v4_send_reset,
1268 struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1269 #ifdef CONFIG_TCP_MD5SIG
1270 .md5_lookup = tcp_v4_reqsk_md5_lookup,
1274 static struct timewait_sock_ops tcp_timewait_sock_ops = {
1275 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1276 .twsk_unique = tcp_twsk_unique,
1277 .twsk_destructor= tcp_twsk_destructor,
1280 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1282 struct inet_request_sock *ireq;
1283 struct tcp_options_received tmp_opt;
1284 struct request_sock *req;
1285 __be32 saddr = skb->nh.iph->saddr;
1286 __be32 daddr = skb->nh.iph->daddr;
1287 __u32 isn = TCP_SKB_CB(skb)->when;
1288 struct dst_entry *dst = NULL;
1289 #ifdef CONFIG_SYN_COOKIES
1290 int want_cookie = 0;
1292 #define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1295 /* Never answer to SYNs send to broadcast or multicast */
1296 if (((struct rtable *)skb->dst)->rt_flags &
1297 (RTCF_BROADCAST | RTCF_MULTICAST))
1300 /* TW buckets are converted to open requests without
1301 * limitations, they conserve resources and peer is
1302 * evidently real one.
1304 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1305 #ifdef CONFIG_SYN_COOKIES
1306 if (sysctl_tcp_syncookies) {
1313 /* Accept backlog is full. If we have already queued enough
1314 * of warm entries in syn queue, drop request. It is better than
1315 * clogging syn queue with openreqs with exponentially increasing
1318 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1321 req = reqsk_alloc(&tcp_request_sock_ops);
1325 #ifdef CONFIG_TCP_MD5SIG
1326 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1329 tcp_clear_options(&tmp_opt);
1330 tmp_opt.mss_clamp = 536;
1331 tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss;
1333 tcp_parse_options(skb, &tmp_opt, 0);
1336 tcp_clear_options(&tmp_opt);
1337 tmp_opt.saw_tstamp = 0;
1340 if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
1341 /* Some OSes (unknown ones, but I see them on web server, which
1342 * contains information interesting only for windows'
1343 * users) do not send their stamp in SYN. It is easy case.
1344 * We simply do not advertise TS support.
1346 tmp_opt.saw_tstamp = 0;
1347 tmp_opt.tstamp_ok = 0;
1349 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1351 tcp_openreq_init(req, &tmp_opt, skb);
1353 if (security_inet_conn_request(sk, skb, req))
1356 ireq = inet_rsk(req);
1357 ireq->loc_addr = daddr;
1358 ireq->rmt_addr = saddr;
1359 ireq->opt = tcp_v4_save_options(sk, skb);
1361 TCP_ECN_create_request(req, skb->h.th);
1364 #ifdef CONFIG_SYN_COOKIES
1365 syn_flood_warning(skb);
1367 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1369 struct inet_peer *peer = NULL;
1371 /* VJ's idea. We save last timestamp seen
1372 * from the destination in peer table, when entering
1373 * state TIME-WAIT, and check against it before
1374 * accepting new connection request.
1376 * If "isn" is not zero, this request hit alive
1377 * timewait bucket, so that all the necessary checks
1378 * are made in the function processing timewait state.
1380 if (tmp_opt.saw_tstamp &&
1381 tcp_death_row.sysctl_tw_recycle &&
1382 (dst = inet_csk_route_req(sk, req)) != NULL &&
1383 (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1384 peer->v4daddr == saddr) {
1385 if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
1386 (s32)(peer->tcp_ts - req->ts_recent) >
1388 NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
1393 /* Kill the following clause, if you dislike this way. */
1394 else if (!sysctl_tcp_syncookies &&
1395 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1396 (sysctl_max_syn_backlog >> 2)) &&
1397 (!peer || !peer->tcp_ts_stamp) &&
1398 (!dst || !dst_metric(dst, RTAX_RTT))) {
1399 /* Without syncookies last quarter of
1400 * backlog is filled with destinations,
1401 * proven to be alive.
1402 * It means that we continue to communicate
1403 * to destinations, already remembered
1404 * to the moment of synflood.
1406 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
1407 "request from %u.%u.%u.%u/%u\n",
1409 ntohs(skb->h.th->source));
1414 isn = tcp_v4_init_sequence(skb);
1416 tcp_rsk(req)->snt_isn = isn;
1418 if (tcp_v4_send_synack(sk, req, dst))
1424 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1436 * The three way handshake has completed - we got a valid synack -
1437 * now create the new socket.
1439 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1440 struct request_sock *req,
1441 struct dst_entry *dst)
1443 struct inet_request_sock *ireq;
1444 struct inet_sock *newinet;
1445 struct tcp_sock *newtp;
1447 #ifdef CONFIG_TCP_MD5SIG
1448 struct tcp_md5sig_key *key;
1451 if (sk_acceptq_is_full(sk))
1454 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
1457 newsk = tcp_create_openreq_child(sk, req, skb);
1461 newsk->sk_gso_type = SKB_GSO_TCPV4;
1462 sk_setup_caps(newsk, dst);
1464 newtp = tcp_sk(newsk);
1465 newinet = inet_sk(newsk);
1466 ireq = inet_rsk(req);
1467 newinet->daddr = ireq->rmt_addr;
1468 newinet->rcv_saddr = ireq->loc_addr;
1469 newinet->saddr = ireq->loc_addr;
1470 newinet->opt = ireq->opt;
1472 newinet->mc_index = inet_iif(skb);
1473 newinet->mc_ttl = skb->nh.iph->ttl;
1474 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1476 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
1477 newinet->id = newtp->write_seq ^ jiffies;
1479 tcp_mtup_init(newsk);
1480 tcp_sync_mss(newsk, dst_mtu(dst));
1481 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1482 tcp_initialize_rcv_mss(newsk);
1484 #ifdef CONFIG_TCP_MD5SIG
1485 /* Copy over the MD5 key from the original socket */
1486 if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) {
1488 * We're using one, so create a matching key
1489 * on the newsk structure. If we fail to get
1490 * memory, then we end up not copying the key
1493 char *newkey = kmalloc(key->keylen, GFP_ATOMIC);
1495 memcpy(newkey, key->key, key->keylen);
1496 tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr,
1497 newkey, key->keylen);
1502 __inet_hash(&tcp_hashinfo, newsk, 0);
1503 __inet_inherit_port(&tcp_hashinfo, sk, newsk);
1508 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1510 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1515 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1517 struct tcphdr *th = skb->h.th;
1518 struct iphdr *iph = skb->nh.iph;
1520 struct request_sock **prev;
1521 /* Find possible connection requests. */
1522 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1523 iph->saddr, iph->daddr);
1525 return tcp_check_req(sk, skb, req, prev);
1527 nsk = inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr,
1528 th->source, skb->nh.iph->daddr,
1529 th->dest, inet_iif(skb));
1532 if (nsk->sk_state != TCP_TIME_WAIT) {
1536 inet_twsk_put(inet_twsk(nsk));
1540 #ifdef CONFIG_SYN_COOKIES
1541 if (!th->rst && !th->syn && th->ack)
1542 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1547 static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1549 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1550 if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
1551 skb->nh.iph->daddr, skb->csum)) {
1552 skb->ip_summed = CHECKSUM_UNNECESSARY;
1557 skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, skb->nh.iph->daddr,
1558 skb->len, IPPROTO_TCP, 0);
1560 if (skb->len <= 76) {
1561 return __skb_checksum_complete(skb);
1567 /* The socket must have it's spinlock held when we get
1570 * We have a potential double-lock case here, so even when
1571 * doing backlog processing we use the BH locking scheme.
1572 * This is because we cannot sleep with the original spinlock
1575 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1578 #ifdef CONFIG_TCP_MD5SIG
1580 * We really want to reject the packet as early as possible
1582 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1583 * o There is an MD5 option and we're not expecting one
1585 if (tcp_v4_inbound_md5_hash (sk, skb))
1589 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1590 TCP_CHECK_TIMER(sk);
1591 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) {
1595 TCP_CHECK_TIMER(sk);
1599 if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb))
1602 if (sk->sk_state == TCP_LISTEN) {
1603 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1608 if (tcp_child_process(sk, nsk, skb)) {
1616 TCP_CHECK_TIMER(sk);
1617 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len)) {
1621 TCP_CHECK_TIMER(sk);
1625 tcp_v4_send_reset(rsk, skb);
1628 /* Be careful here. If this function gets more complicated and
1629 * gcc suffers from register pressure on the x86, sk (in %ebx)
1630 * might be destroyed here. This current version compiles correctly,
1631 * but you have been warned.
1636 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1644 int tcp_v4_rcv(struct sk_buff *skb)
1650 if (skb->pkt_type != PACKET_HOST)
1653 /* Count it even if it's bad */
1654 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1656 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1661 if (th->doff < sizeof(struct tcphdr) / 4)
1663 if (!pskb_may_pull(skb, th->doff * 4))
1666 /* An explanation is required here, I think.
1667 * Packet length and doff are validated by header prediction,
1668 * provided case of th->doff==0 is eliminated.
1669 * So, we defer the checks. */
1670 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1671 tcp_v4_checksum_init(skb)))
1675 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1676 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1677 skb->len - th->doff * 4);
1678 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1679 TCP_SKB_CB(skb)->when = 0;
1680 TCP_SKB_CB(skb)->flags = skb->nh.iph->tos;
1681 TCP_SKB_CB(skb)->sacked = 0;
1683 sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source,
1684 skb->nh.iph->daddr, th->dest,
1691 if (sk->sk_state == TCP_TIME_WAIT)
1694 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1695 goto discard_and_relse;
1698 if (sk_filter(sk, skb))
1699 goto discard_and_relse;
1703 bh_lock_sock_nested(sk);
1705 if (!sock_owned_by_user(sk)) {
1706 #ifdef CONFIG_NET_DMA
1707 struct tcp_sock *tp = tcp_sk(sk);
1708 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1709 tp->ucopy.dma_chan = get_softnet_dma();
1710 if (tp->ucopy.dma_chan)
1711 ret = tcp_v4_do_rcv(sk, skb);
1715 if (!tcp_prequeue(sk, skb))
1716 ret = tcp_v4_do_rcv(sk, skb);
1719 sk_add_backlog(sk, skb);
1727 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1730 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1732 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1734 tcp_v4_send_reset(NULL, skb);
1738 /* Discard frame. */
1747 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1748 inet_twsk_put(inet_twsk(sk));
1752 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1753 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1754 inet_twsk_put(inet_twsk(sk));
1757 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1759 struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
1764 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1765 inet_twsk_put(inet_twsk(sk));
1769 /* Fall through to ACK */
1772 tcp_v4_timewait_ack(sk, skb);
1776 case TCP_TW_SUCCESS:;
1781 /* VJ's idea. Save last timestamp seen from this destination
1782 * and hold it at least for normal timewait interval to use for duplicate
1783 * segment detection in subsequent connections, before they enter synchronized
1787 int tcp_v4_remember_stamp(struct sock *sk)
1789 struct inet_sock *inet = inet_sk(sk);
1790 struct tcp_sock *tp = tcp_sk(sk);
1791 struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1792 struct inet_peer *peer = NULL;
1795 if (!rt || rt->rt_dst != inet->daddr) {
1796 peer = inet_getpeer(inet->daddr, 1);
1800 rt_bind_peer(rt, 1);
1805 if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
1806 (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
1807 peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
1808 peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
1809 peer->tcp_ts = tp->rx_opt.ts_recent;
1819 int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1821 struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
1824 const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1826 if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
1827 (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
1828 peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
1829 peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
1830 peer->tcp_ts = tcptw->tw_ts_recent;
1839 struct inet_connection_sock_af_ops ipv4_specific = {
1840 .queue_xmit = ip_queue_xmit,
1841 .send_check = tcp_v4_send_check,
1842 .rebuild_header = inet_sk_rebuild_header,
1843 .conn_request = tcp_v4_conn_request,
1844 .syn_recv_sock = tcp_v4_syn_recv_sock,
1845 .remember_stamp = tcp_v4_remember_stamp,
1846 .net_header_len = sizeof(struct iphdr),
1847 .setsockopt = ip_setsockopt,
1848 .getsockopt = ip_getsockopt,
1849 .addr2sockaddr = inet_csk_addr2sockaddr,
1850 .sockaddr_len = sizeof(struct sockaddr_in),
1851 #ifdef CONFIG_COMPAT
1852 .compat_setsockopt = compat_ip_setsockopt,
1853 .compat_getsockopt = compat_ip_getsockopt,
1857 struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1858 #ifdef CONFIG_TCP_MD5SIG
1859 .md5_lookup = tcp_v4_md5_lookup,
1860 .calc_md5_hash = tcp_v4_calc_md5_hash,
1861 .md5_add = tcp_v4_md5_add_func,
1862 .md5_parse = tcp_v4_parse_md5_keys,
1866 /* NOTE: A lot of things set to zero explicitly by call to
1867 * sk_alloc() so need not be done here.
1869 static int tcp_v4_init_sock(struct sock *sk)
1871 struct inet_connection_sock *icsk = inet_csk(sk);
1872 struct tcp_sock *tp = tcp_sk(sk);
1874 skb_queue_head_init(&tp->out_of_order_queue);
1875 tcp_init_xmit_timers(sk);
1876 tcp_prequeue_init(tp);
1878 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1879 tp->mdev = TCP_TIMEOUT_INIT;
1881 /* So many TCP implementations out there (incorrectly) count the
1882 * initial SYN frame in their delayed-ACK and congestion control
1883 * algorithms that we must have the following bandaid to talk
1884 * efficiently to them. -DaveM
1888 /* See draft-stevens-tcpca-spec-01 for discussion of the
1889 * initialization of these values.
1891 tp->snd_ssthresh = 0x7fffffff; /* Infinity */
1892 tp->snd_cwnd_clamp = ~0;
1893 tp->mss_cache = 536;
1895 tp->reordering = sysctl_tcp_reordering;
1896 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1898 sk->sk_state = TCP_CLOSE;
1900 sk->sk_write_space = sk_stream_write_space;
1901 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1903 icsk->icsk_af_ops = &ipv4_specific;
1904 icsk->icsk_sync_mss = tcp_sync_mss;
1905 #ifdef CONFIG_TCP_MD5SIG
1906 tp->af_specific = &tcp_sock_ipv4_specific;
1909 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1910 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1912 atomic_inc(&tcp_sockets_allocated);
1917 int tcp_v4_destroy_sock(struct sock *sk)
1919 struct tcp_sock *tp = tcp_sk(sk);
1921 tcp_clear_xmit_timers(sk);
1923 tcp_cleanup_congestion_control(sk);
1925 /* Cleanup up the write buffer. */
1926 sk_stream_writequeue_purge(sk);
1928 /* Cleans up our, hopefully empty, out_of_order_queue. */
1929 __skb_queue_purge(&tp->out_of_order_queue);
1931 #ifdef CONFIG_TCP_MD5SIG
1932 /* Clean up the MD5 key list, if any */
1933 if (tp->md5sig_info) {
1934 tcp_v4_clear_md5_list(sk);
1935 kfree(tp->md5sig_info);
1936 tp->md5sig_info = NULL;
1940 #ifdef CONFIG_NET_DMA
1941 /* Cleans up our sk_async_wait_queue */
1942 __skb_queue_purge(&sk->sk_async_wait_queue);
1945 /* Clean prequeue, it must be empty really */
1946 __skb_queue_purge(&tp->ucopy.prequeue);
1948 /* Clean up a referenced TCP bind bucket. */
1949 if (inet_csk(sk)->icsk_bind_hash)
1950 inet_put_port(&tcp_hashinfo, sk);
1953 * If sendmsg cached page exists, toss it.
1955 if (sk->sk_sndmsg_page) {
1956 __free_page(sk->sk_sndmsg_page);
1957 sk->sk_sndmsg_page = NULL;
1960 atomic_dec(&tcp_sockets_allocated);
1965 EXPORT_SYMBOL(tcp_v4_destroy_sock);
1967 #ifdef CONFIG_PROC_FS
1968 /* Proc filesystem TCP sock list dumping. */
1970 static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
1972 return hlist_empty(head) ? NULL :
1973 list_entry(head->first, struct inet_timewait_sock, tw_node);
1976 static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1978 return tw->tw_node.next ?
1979 hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1982 static void *listening_get_next(struct seq_file *seq, void *cur)
1984 struct inet_connection_sock *icsk;
1985 struct hlist_node *node;
1986 struct sock *sk = cur;
1987 struct tcp_iter_state* st = seq->private;
1991 sk = sk_head(&tcp_hashinfo.listening_hash[0]);
1997 if (st->state == TCP_SEQ_STATE_OPENREQ) {
1998 struct request_sock *req = cur;
2000 icsk = inet_csk(st->syn_wait_sk);
2004 if (req->rsk_ops->family == st->family) {
2010 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
2013 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
2015 sk = sk_next(st->syn_wait_sk);
2016 st->state = TCP_SEQ_STATE_LISTENING;
2017 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2019 icsk = inet_csk(sk);
2020 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2021 if (reqsk_queue_len(&icsk->icsk_accept_queue))
2023 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2027 sk_for_each_from(sk, node) {
2028 if (sk->sk_family == st->family) {
2032 icsk = inet_csk(sk);
2033 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2034 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2036 st->uid = sock_i_uid(sk);
2037 st->syn_wait_sk = sk;
2038 st->state = TCP_SEQ_STATE_OPENREQ;
2042 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2044 if (++st->bucket < INET_LHTABLE_SIZE) {
2045 sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]);
2053 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2055 void *rc = listening_get_next(seq, NULL);
2057 while (rc && *pos) {
2058 rc = listening_get_next(seq, rc);
2064 static void *established_get_first(struct seq_file *seq)
2066 struct tcp_iter_state* st = seq->private;
2069 for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
2071 struct hlist_node *node;
2072 struct inet_timewait_sock *tw;
2074 /* We can reschedule _before_ having picked the target: */
2075 cond_resched_softirq();
2077 read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
2078 sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2079 if (sk->sk_family != st->family) {
2085 st->state = TCP_SEQ_STATE_TIME_WAIT;
2086 inet_twsk_for_each(tw, node,
2087 &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) {
2088 if (tw->tw_family != st->family) {
2094 read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
2095 st->state = TCP_SEQ_STATE_ESTABLISHED;
2101 static void *established_get_next(struct seq_file *seq, void *cur)
2103 struct sock *sk = cur;
2104 struct inet_timewait_sock *tw;
2105 struct hlist_node *node;
2106 struct tcp_iter_state* st = seq->private;
2110 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2114 while (tw && tw->tw_family != st->family) {
2121 read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
2122 st->state = TCP_SEQ_STATE_ESTABLISHED;
2124 /* We can reschedule between buckets: */
2125 cond_resched_softirq();
2127 if (++st->bucket < tcp_hashinfo.ehash_size) {
2128 read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
2129 sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
2137 sk_for_each_from(sk, node) {
2138 if (sk->sk_family == st->family)
2142 st->state = TCP_SEQ_STATE_TIME_WAIT;
2143 tw = tw_head(&tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain);
2151 static void *established_get_idx(struct seq_file *seq, loff_t pos)
2153 void *rc = established_get_first(seq);
2156 rc = established_get_next(seq, rc);
2162 static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2165 struct tcp_iter_state* st = seq->private;
2167 inet_listen_lock(&tcp_hashinfo);
2168 st->state = TCP_SEQ_STATE_LISTENING;
2169 rc = listening_get_idx(seq, &pos);
2172 inet_listen_unlock(&tcp_hashinfo);
2174 st->state = TCP_SEQ_STATE_ESTABLISHED;
2175 rc = established_get_idx(seq, pos);
2181 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2183 struct tcp_iter_state* st = seq->private;
2184 st->state = TCP_SEQ_STATE_LISTENING;
2186 return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2189 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2192 struct tcp_iter_state* st;
2194 if (v == SEQ_START_TOKEN) {
2195 rc = tcp_get_idx(seq, 0);
2200 switch (st->state) {
2201 case TCP_SEQ_STATE_OPENREQ:
2202 case TCP_SEQ_STATE_LISTENING:
2203 rc = listening_get_next(seq, v);
2205 inet_listen_unlock(&tcp_hashinfo);
2207 st->state = TCP_SEQ_STATE_ESTABLISHED;
2208 rc = established_get_first(seq);
2211 case TCP_SEQ_STATE_ESTABLISHED:
2212 case TCP_SEQ_STATE_TIME_WAIT:
2213 rc = established_get_next(seq, v);
2221 static void tcp_seq_stop(struct seq_file *seq, void *v)
2223 struct tcp_iter_state* st = seq->private;
2225 switch (st->state) {
2226 case TCP_SEQ_STATE_OPENREQ:
2228 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2229 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2231 case TCP_SEQ_STATE_LISTENING:
2232 if (v != SEQ_START_TOKEN)
2233 inet_listen_unlock(&tcp_hashinfo);
2235 case TCP_SEQ_STATE_TIME_WAIT:
2236 case TCP_SEQ_STATE_ESTABLISHED:
2238 read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
2244 static int tcp_seq_open(struct inode *inode, struct file *file)
2246 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2247 struct seq_file *seq;
2248 struct tcp_iter_state *s;
2251 if (unlikely(afinfo == NULL))
2254 s = kzalloc(sizeof(*s), GFP_KERNEL);
2257 s->family = afinfo->family;
2258 s->seq_ops.start = tcp_seq_start;
2259 s->seq_ops.next = tcp_seq_next;
2260 s->seq_ops.show = afinfo->seq_show;
2261 s->seq_ops.stop = tcp_seq_stop;
2263 rc = seq_open(file, &s->seq_ops);
2266 seq = file->private_data;
2275 int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
2278 struct proc_dir_entry *p;
2282 afinfo->seq_fops->owner = afinfo->owner;
2283 afinfo->seq_fops->open = tcp_seq_open;
2284 afinfo->seq_fops->read = seq_read;
2285 afinfo->seq_fops->llseek = seq_lseek;
2286 afinfo->seq_fops->release = seq_release_private;
2288 p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
2296 void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
2300 proc_net_remove(afinfo->name);
2301 memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
2304 static void get_openreq4(struct sock *sk, struct request_sock *req,
2305 char *tmpbuf, int i, int uid)
2307 const struct inet_request_sock *ireq = inet_rsk(req);
2308 int ttd = req->expires - jiffies;
2310 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2311 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p",
2314 ntohs(inet_sk(sk)->sport),
2316 ntohs(ireq->rmt_port),
2318 0, 0, /* could print option size, but that is af dependent. */
2319 1, /* timers active (only the expire timer) */
2320 jiffies_to_clock_t(ttd),
2323 0, /* non standard timer */
2324 0, /* open_requests have no inode */
2325 atomic_read(&sk->sk_refcnt),
2329 static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
2332 unsigned long timer_expires;
2333 struct tcp_sock *tp = tcp_sk(sp);
2334 const struct inet_connection_sock *icsk = inet_csk(sp);
2335 struct inet_sock *inet = inet_sk(sp);
2336 __be32 dest = inet->daddr;
2337 __be32 src = inet->rcv_saddr;
2338 __u16 destp = ntohs(inet->dport);
2339 __u16 srcp = ntohs(inet->sport);
2341 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2343 timer_expires = icsk->icsk_timeout;
2344 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2346 timer_expires = icsk->icsk_timeout;
2347 } else if (timer_pending(&sp->sk_timer)) {
2349 timer_expires = sp->sk_timer.expires;
2352 timer_expires = jiffies;
2355 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2356 "%08X %5d %8d %lu %d %p %u %u %u %u %d",
2357 i, src, srcp, dest, destp, sp->sk_state,
2358 tp->write_seq - tp->snd_una,
2359 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
2361 jiffies_to_clock_t(timer_expires - jiffies),
2362 icsk->icsk_retransmits,
2364 icsk->icsk_probes_out,
2366 atomic_read(&sp->sk_refcnt), sp,
2369 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2371 tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh);
2374 static void get_timewait4_sock(struct inet_timewait_sock *tw, char *tmpbuf, int i)
2378 int ttd = tw->tw_ttd - jiffies;
2383 dest = tw->tw_daddr;
2384 src = tw->tw_rcv_saddr;
2385 destp = ntohs(tw->tw_dport);
2386 srcp = ntohs(tw->tw_sport);
2388 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2389 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p",
2390 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2391 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2392 atomic_read(&tw->tw_refcnt), tw);
2397 static int tcp4_seq_show(struct seq_file *seq, void *v)
2399 struct tcp_iter_state* st;
2400 char tmpbuf[TMPSZ + 1];
2402 if (v == SEQ_START_TOKEN) {
2403 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2404 " sl local_address rem_address st tx_queue "
2405 "rx_queue tr tm->when retrnsmt uid timeout "
2411 switch (st->state) {
2412 case TCP_SEQ_STATE_LISTENING:
2413 case TCP_SEQ_STATE_ESTABLISHED:
2414 get_tcp4_sock(v, tmpbuf, st->num);
2416 case TCP_SEQ_STATE_OPENREQ:
2417 get_openreq4(st->syn_wait_sk, v, tmpbuf, st->num, st->uid);
2419 case TCP_SEQ_STATE_TIME_WAIT:
2420 get_timewait4_sock(v, tmpbuf, st->num);
2423 seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf);
2428 static struct file_operations tcp4_seq_fops;
2429 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2430 .owner = THIS_MODULE,
2433 .seq_show = tcp4_seq_show,
2434 .seq_fops = &tcp4_seq_fops,
2437 int __init tcp4_proc_init(void)
2439 return tcp_proc_register(&tcp4_seq_afinfo);
2442 void tcp4_proc_exit(void)
2444 tcp_proc_unregister(&tcp4_seq_afinfo);
2446 #endif /* CONFIG_PROC_FS */
2448 struct proto tcp_prot = {
2450 .owner = THIS_MODULE,
2452 .connect = tcp_v4_connect,
2453 .disconnect = tcp_disconnect,
2454 .accept = inet_csk_accept,
2456 .init = tcp_v4_init_sock,
2457 .destroy = tcp_v4_destroy_sock,
2458 .shutdown = tcp_shutdown,
2459 .setsockopt = tcp_setsockopt,
2460 .getsockopt = tcp_getsockopt,
2461 .sendmsg = tcp_sendmsg,
2462 .recvmsg = tcp_recvmsg,
2463 .backlog_rcv = tcp_v4_do_rcv,
2464 .hash = tcp_v4_hash,
2465 .unhash = tcp_unhash,
2466 .get_port = tcp_v4_get_port,
2467 .enter_memory_pressure = tcp_enter_memory_pressure,
2468 .sockets_allocated = &tcp_sockets_allocated,
2469 .orphan_count = &tcp_orphan_count,
2470 .memory_allocated = &tcp_memory_allocated,
2471 .memory_pressure = &tcp_memory_pressure,
2472 .sysctl_mem = sysctl_tcp_mem,
2473 .sysctl_wmem = sysctl_tcp_wmem,
2474 .sysctl_rmem = sysctl_tcp_rmem,
2475 .max_header = MAX_TCP_HEADER,
2476 .obj_size = sizeof(struct tcp_sock),
2477 .twsk_prot = &tcp_timewait_sock_ops,
2478 .rsk_prot = &tcp_request_sock_ops,
2479 #ifdef CONFIG_COMPAT
2480 .compat_setsockopt = compat_tcp_setsockopt,
2481 .compat_getsockopt = compat_tcp_getsockopt,
2485 void __init tcp_v4_init(struct net_proto_family *ops)
2487 if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW, IPPROTO_TCP) < 0)
2488 panic("Failed to create the TCP control socket.\n");
2491 EXPORT_SYMBOL(ipv4_specific);
2492 EXPORT_SYMBOL(tcp_hashinfo);
2493 EXPORT_SYMBOL(tcp_prot);
2494 EXPORT_SYMBOL(tcp_unhash);
2495 EXPORT_SYMBOL(tcp_v4_conn_request);
2496 EXPORT_SYMBOL(tcp_v4_connect);
2497 EXPORT_SYMBOL(tcp_v4_do_rcv);
2498 EXPORT_SYMBOL(tcp_v4_remember_stamp);
2499 EXPORT_SYMBOL(tcp_v4_send_check);
2500 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2502 #ifdef CONFIG_PROC_FS
2503 EXPORT_SYMBOL(tcp_proc_register);
2504 EXPORT_SYMBOL(tcp_proc_unregister);
2506 EXPORT_SYMBOL(sysctl_local_port_range);
2507 EXPORT_SYMBOL(sysctl_tcp_low_latency);