2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * Implementation of the Transmission Control Protocol(TCP).
8 * IPv4 specific functions
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
16 * See tcp.c for author information
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
37 * request_sock handling and moved
38 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
40 * Added new listen semantics.
41 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
54 #include <linux/types.h>
55 #include <linux/fcntl.h>
56 #include <linux/module.h>
57 #include <linux/random.h>
58 #include <linux/cache.h>
59 #include <linux/jhash.h>
60 #include <linux/init.h>
61 #include <linux/times.h>
63 #include <net/net_namespace.h>
65 #include <net/inet_hashtables.h>
67 #include <net/transp_v6.h>
69 #include <net/inet_common.h>
70 #include <net/timewait_sock.h>
72 #include <net/netdma.h>
74 #include <linux/inet.h>
75 #include <linux/ipv6.h>
76 #include <linux/stddef.h>
77 #include <linux/proc_fs.h>
78 #include <linux/seq_file.h>
80 #include <linux/crypto.h>
81 #include <linux/scatterlist.h>
83 int sysctl_tcp_tw_reuse __read_mostly;
84 int sysctl_tcp_low_latency __read_mostly;
86 /* Check TCP sequence numbers in ICMP packets. */
87 #define ICMP_MIN_LENGTH 8
89 void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
91 #ifdef CONFIG_TCP_MD5SIG
92 static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
94 static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
95 __be32 saddr, __be32 daddr,
96 struct tcphdr *th, int protocol,
100 struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
101 .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock),
102 .lhash_users = ATOMIC_INIT(0),
103 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
106 static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
108 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
111 tcp_hdr(skb)->source);
114 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
116 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
117 struct tcp_sock *tp = tcp_sk(sk);
119 /* With PAWS, it is safe from the viewpoint
120 of data integrity. Even without PAWS it is safe provided sequence
121 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
123 Actually, the idea is close to VJ's one, only timestamp cache is
124 held not per host, but per port pair and TW bucket is used as state
127 If TW bucket has been already destroyed we fall back to VJ's scheme
128 and use initial timestamp retrieved from peer table.
130 if (tcptw->tw_ts_recent_stamp &&
131 (twp == NULL || (sysctl_tcp_tw_reuse &&
132 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
133 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
134 if (tp->write_seq == 0)
136 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
137 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
145 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
147 /* This will initiate an outgoing connection. */
148 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
150 struct inet_sock *inet = inet_sk(sk);
151 struct tcp_sock *tp = tcp_sk(sk);
152 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
154 __be32 daddr, nexthop;
158 if (addr_len < sizeof(struct sockaddr_in))
161 if (usin->sin_family != AF_INET)
162 return -EAFNOSUPPORT;
164 nexthop = daddr = usin->sin_addr.s_addr;
165 if (inet->opt && inet->opt->srr) {
168 nexthop = inet->opt->faddr;
171 tmp = ip_route_connect(&rt, nexthop, inet->saddr,
172 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
174 inet->sport, usin->sin_port, sk, 1);
176 if (tmp == -ENETUNREACH)
177 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
181 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
186 if (!inet->opt || !inet->opt->srr)
190 inet->saddr = rt->rt_src;
191 inet->rcv_saddr = inet->saddr;
193 if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
194 /* Reset inherited state */
195 tp->rx_opt.ts_recent = 0;
196 tp->rx_opt.ts_recent_stamp = 0;
200 if (tcp_death_row.sysctl_tw_recycle &&
201 !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
202 struct inet_peer *peer = rt_get_peer(rt);
204 * VJ's idea. We save last timestamp seen from
205 * the destination in peer table, when entering state
206 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
207 * when trying new connection.
210 peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) {
211 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
212 tp->rx_opt.ts_recent = peer->tcp_ts;
216 inet->dport = usin->sin_port;
219 inet_csk(sk)->icsk_ext_hdr_len = 0;
221 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
223 tp->rx_opt.mss_clamp = 536;
225 /* Socket identity is still unknown (sport may be zero).
226 * However we set state to SYN-SENT and not releasing socket
227 * lock select source port, enter ourselves into the hash tables and
228 * complete initialization after this.
230 tcp_set_state(sk, TCP_SYN_SENT);
231 err = inet_hash_connect(&tcp_death_row, sk);
235 err = ip_route_newports(&rt, IPPROTO_TCP,
236 inet->sport, inet->dport, sk);
240 /* OK, now commit destination to socket. */
241 sk->sk_gso_type = SKB_GSO_TCPV4;
242 sk_setup_caps(sk, &rt->u.dst);
245 tp->write_seq = secure_tcp_sequence_number(inet->saddr,
250 inet->id = tp->write_seq ^ jiffies;
252 err = tcp_connect(sk);
261 * This unhashes the socket and releases the local port,
264 tcp_set_state(sk, TCP_CLOSE);
266 sk->sk_route_caps = 0;
272 * This routine does path mtu discovery as defined in RFC1191.
274 static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
276 struct dst_entry *dst;
277 struct inet_sock *inet = inet_sk(sk);
279 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
280 * send out by Linux are always <576bytes so they should go through
283 if (sk->sk_state == TCP_LISTEN)
286 /* We don't check in the destentry if pmtu discovery is forbidden
287 * on this route. We just assume that no packet_to_big packets
288 * are send back when pmtu discovery is not active.
289 * There is a small race when the user changes this flag in the
290 * route, but I think that's acceptable.
292 if ((dst = __sk_dst_check(sk, 0)) == NULL)
295 dst->ops->update_pmtu(dst, mtu);
297 /* Something is about to be wrong... Remember soft error
298 * for the case, if this connection will not able to recover.
300 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
301 sk->sk_err_soft = EMSGSIZE;
305 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
306 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
307 tcp_sync_mss(sk, mtu);
309 /* Resend the TCP packet because it's
310 * clear that the old packet has been
311 * dropped. This is the new "fast" path mtu
314 tcp_simple_retransmit(sk);
315 } /* else let the usual retransmit timer handle it */
319 * This routine is called by the ICMP module when it gets some
320 * sort of error condition. If err < 0 then the socket should
321 * be closed and the error returned to the user. If err > 0
322 * it's just the icmp type << 8 | icmp code. After adjustment
323 * header points to the first 8 bytes of the tcp header. We need
324 * to find the appropriate port.
326 * The locking strategy used here is very "optimistic". When
327 * someone else accesses the socket the ICMP is just dropped
328 * and for some paths there is no check at all.
329 * A more general error queue to queue errors for later handling
330 * is probably better.
334 void tcp_v4_err(struct sk_buff *skb, u32 info)
336 struct iphdr *iph = (struct iphdr *)skb->data;
337 struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
339 struct inet_sock *inet;
340 const int type = icmp_hdr(skb)->type;
341 const int code = icmp_hdr(skb)->code;
346 if (skb->len < (iph->ihl << 2) + 8) {
347 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
351 sk = inet_lookup(dev_net(skb->dev), &tcp_hashinfo, iph->daddr, th->dest,
352 iph->saddr, th->source, inet_iif(skb));
354 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
357 if (sk->sk_state == TCP_TIME_WAIT) {
358 inet_twsk_put(inet_twsk(sk));
363 /* If too many ICMPs get dropped on busy
364 * servers this needs to be solved differently.
366 if (sock_owned_by_user(sk))
367 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
369 if (sk->sk_state == TCP_CLOSE)
373 seq = ntohl(th->seq);
374 if (sk->sk_state != TCP_LISTEN &&
375 !between(seq, tp->snd_una, tp->snd_nxt)) {
376 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
381 case ICMP_SOURCE_QUENCH:
382 /* Just silently ignore these. */
384 case ICMP_PARAMETERPROB:
387 case ICMP_DEST_UNREACH:
388 if (code > NR_ICMP_UNREACH)
391 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
392 if (!sock_owned_by_user(sk))
393 do_pmtu_discovery(sk, iph, info);
397 err = icmp_err_convert[code].errno;
399 case ICMP_TIME_EXCEEDED:
406 switch (sk->sk_state) {
407 struct request_sock *req, **prev;
409 if (sock_owned_by_user(sk))
412 req = inet_csk_search_req(sk, &prev, th->dest,
413 iph->daddr, iph->saddr);
417 /* ICMPs are not backlogged, hence we cannot get
418 an established socket here.
422 if (seq != tcp_rsk(req)->snt_isn) {
423 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
428 * Still in SYN_RECV, just remove it silently.
429 * There is no good way to pass the error to the newly
430 * created socket, and POSIX does not want network
431 * errors returned from accept().
433 inet_csk_reqsk_queue_drop(sk, req, prev);
437 case TCP_SYN_RECV: /* Cannot happen.
438 It can f.e. if SYNs crossed.
440 if (!sock_owned_by_user(sk)) {
443 sk->sk_error_report(sk);
447 sk->sk_err_soft = err;
452 /* If we've already connected we will keep trying
453 * until we time out, or the user gives up.
455 * rfc1122 4.2.3.9 allows to consider as hard errors
456 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
457 * but it is obsoleted by pmtu discovery).
459 * Note, that in modern internet, where routing is unreliable
460 * and in each dark corner broken firewalls sit, sending random
461 * errors ordered by their masters even this two messages finally lose
462 * their original sense (even Linux sends invalid PORT_UNREACHs)
464 * Now we are in compliance with RFCs.
469 if (!sock_owned_by_user(sk) && inet->recverr) {
471 sk->sk_error_report(sk);
472 } else { /* Only an error on timeout */
473 sk->sk_err_soft = err;
481 /* This routine computes an IPv4 TCP checksum. */
482 void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
484 struct inet_sock *inet = inet_sk(sk);
485 struct tcphdr *th = tcp_hdr(skb);
487 if (skb->ip_summed == CHECKSUM_PARTIAL) {
488 th->check = ~tcp_v4_check(len, inet->saddr,
490 skb->csum_start = skb_transport_header(skb) - skb->head;
491 skb->csum_offset = offsetof(struct tcphdr, check);
493 th->check = tcp_v4_check(len, inet->saddr, inet->daddr,
494 csum_partial((char *)th,
500 int tcp_v4_gso_send_check(struct sk_buff *skb)
502 const struct iphdr *iph;
505 if (!pskb_may_pull(skb, sizeof(*th)))
512 th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
513 skb->csum_start = skb_transport_header(skb) - skb->head;
514 skb->csum_offset = offsetof(struct tcphdr, check);
515 skb->ip_summed = CHECKSUM_PARTIAL;
520 * This routine will send an RST to the other tcp.
522 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
524 * Answer: if a packet caused RST, it is not for a socket
525 * existing in our system, if it is matched to a socket,
526 * it is just duplicate segment or bug in other side's TCP.
527 * So that we build reply only basing on parameters
528 * arrived with segment.
529 * Exception: precedence violation. We do not implement it in any case.
532 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
534 struct tcphdr *th = tcp_hdr(skb);
537 #ifdef CONFIG_TCP_MD5SIG
538 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
541 struct ip_reply_arg arg;
542 #ifdef CONFIG_TCP_MD5SIG
543 struct tcp_md5sig_key *key;
546 /* Never send a reset in response to a reset. */
550 if (skb->rtable->rt_type != RTN_LOCAL)
553 /* Swap the send and the receive. */
554 memset(&rep, 0, sizeof(rep));
555 rep.th.dest = th->source;
556 rep.th.source = th->dest;
557 rep.th.doff = sizeof(struct tcphdr) / 4;
561 rep.th.seq = th->ack_seq;
564 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
565 skb->len - (th->doff << 2));
568 memset(&arg, 0, sizeof(arg));
569 arg.iov[0].iov_base = (unsigned char *)&rep;
570 arg.iov[0].iov_len = sizeof(rep.th);
572 #ifdef CONFIG_TCP_MD5SIG
573 key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
575 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
577 (TCPOPT_MD5SIG << 8) |
579 /* Update length and the length the header thinks exists */
580 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
581 rep.th.doff = arg.iov[0].iov_len / 4;
583 tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1],
587 &rep.th, IPPROTO_TCP,
591 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
592 ip_hdr(skb)->saddr, /* XXX */
593 sizeof(struct tcphdr), IPPROTO_TCP, 0);
594 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
596 ip_send_reply(dev_net(skb->dst->dev)->ipv4.tcp_sock, skb,
597 &arg, arg.iov[0].iov_len);
599 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
600 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
603 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
604 outside socket context is ugly, certainly. What can I do?
607 static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
608 struct sk_buff *skb, u32 seq, u32 ack,
611 struct tcphdr *th = tcp_hdr(skb);
614 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
615 #ifdef CONFIG_TCP_MD5SIG
616 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
620 struct ip_reply_arg arg;
621 #ifdef CONFIG_TCP_MD5SIG
622 struct tcp_md5sig_key *key;
623 struct tcp_md5sig_key tw_key;
626 memset(&rep.th, 0, sizeof(struct tcphdr));
627 memset(&arg, 0, sizeof(arg));
629 arg.iov[0].iov_base = (unsigned char *)&rep;
630 arg.iov[0].iov_len = sizeof(rep.th);
632 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
633 (TCPOPT_TIMESTAMP << 8) |
635 rep.opt[1] = htonl(tcp_time_stamp);
636 rep.opt[2] = htonl(ts);
637 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
640 /* Swap the send and the receive. */
641 rep.th.dest = th->source;
642 rep.th.source = th->dest;
643 rep.th.doff = arg.iov[0].iov_len / 4;
644 rep.th.seq = htonl(seq);
645 rep.th.ack_seq = htonl(ack);
647 rep.th.window = htons(win);
649 #ifdef CONFIG_TCP_MD5SIG
651 * The SKB holds an imcoming packet, but may not have a valid ->sk
652 * pointer. This is especially the case when we're dealing with a
653 * TIME_WAIT ack, because the sk structure is long gone, and only
654 * the tcp_timewait_sock remains. So the md5 key is stashed in that
655 * structure, and we use it in preference. I believe that (twsk ||
656 * skb->sk) holds true, but we program defensively.
658 if (!twsk && skb->sk) {
659 key = tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr);
660 } else if (twsk && twsk->tw_md5_keylen) {
661 tw_key.key = twsk->tw_md5_key;
662 tw_key.keylen = twsk->tw_md5_keylen;
668 int offset = (ts) ? 3 : 0;
670 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
672 (TCPOPT_MD5SIG << 8) |
674 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
675 rep.th.doff = arg.iov[0].iov_len/4;
677 tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset],
681 &rep.th, IPPROTO_TCP,
685 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
686 ip_hdr(skb)->saddr, /* XXX */
687 arg.iov[0].iov_len, IPPROTO_TCP, 0);
688 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
690 arg.bound_dev_if = twsk->tw_sk.tw_bound_dev_if;
692 ip_send_reply(dev_net(skb->dev)->ipv4.tcp_sock, skb,
693 &arg, arg.iov[0].iov_len);
695 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
698 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
700 struct inet_timewait_sock *tw = inet_twsk(sk);
701 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
703 tcp_v4_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
704 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
705 tcptw->tw_ts_recent);
710 static void tcp_v4_reqsk_send_ack(struct sk_buff *skb,
711 struct request_sock *req)
713 tcp_v4_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1,
714 tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
719 * Send a SYN-ACK after having received a SYN.
720 * This still operates on a request_sock only, not on a big
723 static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
724 struct dst_entry *dst)
726 const struct inet_request_sock *ireq = inet_rsk(req);
728 struct sk_buff * skb;
730 /* First, grab a route. */
731 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
734 skb = tcp_make_synack(sk, dst, req);
737 struct tcphdr *th = tcp_hdr(skb);
739 th->check = tcp_v4_check(skb->len,
742 csum_partial((char *)th, skb->len,
745 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
748 err = net_xmit_eval(err);
755 static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req)
757 return __tcp_v4_send_synack(sk, req, NULL);
761 * IPv4 request_sock destructor.
763 static void tcp_v4_reqsk_destructor(struct request_sock *req)
765 kfree(inet_rsk(req)->opt);
768 #ifdef CONFIG_SYN_COOKIES
769 static void syn_flood_warning(struct sk_buff *skb)
771 static unsigned long warntime;
773 if (time_after(jiffies, (warntime + HZ * 60))) {
776 "possible SYN flooding on port %d. Sending cookies.\n",
777 ntohs(tcp_hdr(skb)->dest));
783 * Save and compile IPv4 options into the request_sock if needed.
785 static struct ip_options *tcp_v4_save_options(struct sock *sk,
788 struct ip_options *opt = &(IPCB(skb)->opt);
789 struct ip_options *dopt = NULL;
791 if (opt && opt->optlen) {
792 int opt_size = optlength(opt);
793 dopt = kmalloc(opt_size, GFP_ATOMIC);
795 if (ip_options_echo(dopt, skb)) {
804 #ifdef CONFIG_TCP_MD5SIG
806 * RFC2385 MD5 checksumming requires a mapping of
807 * IP address->MD5 Key.
808 * We need to maintain these in the sk structure.
811 /* Find the Key structure for an address. */
812 static struct tcp_md5sig_key *
813 tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
815 struct tcp_sock *tp = tcp_sk(sk);
818 if (!tp->md5sig_info || !tp->md5sig_info->entries4)
820 for (i = 0; i < tp->md5sig_info->entries4; i++) {
821 if (tp->md5sig_info->keys4[i].addr == addr)
822 return &tp->md5sig_info->keys4[i].base;
827 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
828 struct sock *addr_sk)
830 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr);
833 EXPORT_SYMBOL(tcp_v4_md5_lookup);
835 static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
836 struct request_sock *req)
838 return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
841 /* This can be called on a newly created socket, from other files */
842 int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
843 u8 *newkey, u8 newkeylen)
845 /* Add Key to the list */
846 struct tcp_md5sig_key *key;
847 struct tcp_sock *tp = tcp_sk(sk);
848 struct tcp4_md5sig_key *keys;
850 key = tcp_v4_md5_do_lookup(sk, addr);
852 /* Pre-existing entry - just update that one. */
855 key->keylen = newkeylen;
857 struct tcp_md5sig_info *md5sig;
859 if (!tp->md5sig_info) {
860 tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
862 if (!tp->md5sig_info) {
866 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
868 if (tcp_alloc_md5sig_pool() == NULL) {
872 md5sig = tp->md5sig_info;
874 if (md5sig->alloced4 == md5sig->entries4) {
875 keys = kmalloc((sizeof(*keys) *
876 (md5sig->entries4 + 1)), GFP_ATOMIC);
879 tcp_free_md5sig_pool();
883 if (md5sig->entries4)
884 memcpy(keys, md5sig->keys4,
885 sizeof(*keys) * md5sig->entries4);
887 /* Free old key list, and reference new one */
888 kfree(md5sig->keys4);
889 md5sig->keys4 = keys;
893 md5sig->keys4[md5sig->entries4 - 1].addr = addr;
894 md5sig->keys4[md5sig->entries4 - 1].base.key = newkey;
895 md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
900 EXPORT_SYMBOL(tcp_v4_md5_do_add);
902 static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
903 u8 *newkey, u8 newkeylen)
905 return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr,
909 int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
911 struct tcp_sock *tp = tcp_sk(sk);
914 for (i = 0; i < tp->md5sig_info->entries4; i++) {
915 if (tp->md5sig_info->keys4[i].addr == addr) {
917 kfree(tp->md5sig_info->keys4[i].base.key);
918 tp->md5sig_info->entries4--;
920 if (tp->md5sig_info->entries4 == 0) {
921 kfree(tp->md5sig_info->keys4);
922 tp->md5sig_info->keys4 = NULL;
923 tp->md5sig_info->alloced4 = 0;
924 } else if (tp->md5sig_info->entries4 != i) {
925 /* Need to do some manipulation */
926 memmove(&tp->md5sig_info->keys4[i],
927 &tp->md5sig_info->keys4[i+1],
928 (tp->md5sig_info->entries4 - i) *
929 sizeof(struct tcp4_md5sig_key));
931 tcp_free_md5sig_pool();
938 EXPORT_SYMBOL(tcp_v4_md5_do_del);
940 static void tcp_v4_clear_md5_list(struct sock *sk)
942 struct tcp_sock *tp = tcp_sk(sk);
944 /* Free each key, then the set of key keys,
945 * the crypto element, and then decrement our
946 * hold on the last resort crypto.
948 if (tp->md5sig_info->entries4) {
950 for (i = 0; i < tp->md5sig_info->entries4; i++)
951 kfree(tp->md5sig_info->keys4[i].base.key);
952 tp->md5sig_info->entries4 = 0;
953 tcp_free_md5sig_pool();
955 if (tp->md5sig_info->keys4) {
956 kfree(tp->md5sig_info->keys4);
957 tp->md5sig_info->keys4 = NULL;
958 tp->md5sig_info->alloced4 = 0;
962 static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
965 struct tcp_md5sig cmd;
966 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
969 if (optlen < sizeof(cmd))
972 if (copy_from_user(&cmd, optval, sizeof(cmd)))
975 if (sin->sin_family != AF_INET)
978 if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
979 if (!tcp_sk(sk)->md5sig_info)
981 return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
984 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
987 if (!tcp_sk(sk)->md5sig_info) {
988 struct tcp_sock *tp = tcp_sk(sk);
989 struct tcp_md5sig_info *p = kzalloc(sizeof(*p), GFP_KERNEL);
995 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
998 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
1001 return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
1002 newkey, cmd.tcpm_keylen);
1005 static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
1006 __be32 saddr, __be32 daddr,
1007 struct tcphdr *th, int protocol,
1008 unsigned int tcplen)
1010 struct scatterlist sg[4];
1013 __sum16 old_checksum;
1014 struct tcp_md5sig_pool *hp;
1015 struct tcp4_pseudohdr *bp;
1016 struct hash_desc *desc;
1018 unsigned int nbytes = 0;
1021 * Okay, so RFC2385 is turned on for this connection,
1022 * so we need to generate the MD5 hash for the packet now.
1025 hp = tcp_get_md5sig_pool();
1027 goto clear_hash_noput;
1029 bp = &hp->md5_blk.ip4;
1030 desc = &hp->md5_desc;
1033 * 1. the TCP pseudo-header (in the order: source IP address,
1034 * destination IP address, zero-padded protocol number, and
1040 bp->protocol = protocol;
1041 bp->len = htons(tcplen);
1043 sg_init_table(sg, 4);
1045 sg_set_buf(&sg[block++], bp, sizeof(*bp));
1046 nbytes += sizeof(*bp);
1048 /* 2. the TCP header, excluding options, and assuming a
1051 old_checksum = th->check;
1053 sg_set_buf(&sg[block++], th, sizeof(struct tcphdr));
1054 nbytes += sizeof(struct tcphdr);
1056 /* 3. the TCP segment data (if any) */
1057 data_len = tcplen - (th->doff << 2);
1059 unsigned char *data = (unsigned char *)th + (th->doff << 2);
1060 sg_set_buf(&sg[block++], data, data_len);
1064 /* 4. an independently-specified key or password, known to both
1065 * TCPs and presumably connection-specific
1067 sg_set_buf(&sg[block++], key->key, key->keylen);
1068 nbytes += key->keylen;
1070 sg_mark_end(&sg[block - 1]);
1072 /* Now store the Hash into the packet */
1073 err = crypto_hash_init(desc);
1076 err = crypto_hash_update(desc, sg, nbytes);
1079 err = crypto_hash_final(desc, md5_hash);
1083 /* Reset header, and free up the crypto */
1084 tcp_put_md5sig_pool();
1085 th->check = old_checksum;
1090 tcp_put_md5sig_pool();
1092 memset(md5_hash, 0, 16);
1096 int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
1098 struct dst_entry *dst,
1099 struct request_sock *req,
1100 struct tcphdr *th, int protocol,
1101 unsigned int tcplen)
1103 __be32 saddr, daddr;
1106 saddr = inet_sk(sk)->saddr;
1107 daddr = inet_sk(sk)->daddr;
1109 struct rtable *rt = (struct rtable *)dst;
1114 return tcp_v4_do_calc_md5_hash(md5_hash, key,
1116 th, protocol, tcplen);
1119 EXPORT_SYMBOL(tcp_v4_calc_md5_hash);
1121 static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
1124 * This gets called for each TCP segment that arrives
1125 * so we want to be efficient.
1126 * We have 3 drop cases:
1127 * o No MD5 hash and one expected.
1128 * o MD5 hash and we're not expecting one.
1129 * o MD5 hash and its wrong.
1131 __u8 *hash_location = NULL;
1132 struct tcp_md5sig_key *hash_expected;
1133 const struct iphdr *iph = ip_hdr(skb);
1134 struct tcphdr *th = tcp_hdr(skb);
1135 int length = (th->doff << 2) - sizeof(struct tcphdr);
1138 unsigned char newhash[16];
1140 hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
1143 * If the TCP option length is less than the TCP_MD5SIG
1144 * option length, then we can shortcut
1146 if (length < TCPOLEN_MD5SIG) {
1153 /* Okay, we can't shortcut - we have to grub through the options */
1154 ptr = (unsigned char *)(th + 1);
1155 while (length > 0) {
1156 int opcode = *ptr++;
1169 if (opsize > length)
1172 if (opcode == TCPOPT_MD5SIG) {
1173 hash_location = ptr;
1181 /* We've parsed the options - do we have a hash? */
1182 if (!hash_expected && !hash_location)
1185 if (hash_expected && !hash_location) {
1186 LIMIT_NETDEBUG(KERN_INFO "MD5 Hash expected but NOT found "
1187 "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
1188 NIPQUAD(iph->saddr), ntohs(th->source),
1189 NIPQUAD(iph->daddr), ntohs(th->dest));
1193 if (!hash_expected && hash_location) {
1194 LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found "
1195 "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
1196 NIPQUAD(iph->saddr), ntohs(th->source),
1197 NIPQUAD(iph->daddr), ntohs(th->dest));
1201 /* Okay, so this is hash_expected and hash_location -
1202 * so we need to calculate the checksum.
1204 genhash = tcp_v4_do_calc_md5_hash(newhash,
1206 iph->saddr, iph->daddr,
1207 th, sk->sk_protocol,
1210 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1211 if (net_ratelimit()) {
1212 printk(KERN_INFO "MD5 Hash failed for "
1213 "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)%s\n",
1214 NIPQUAD(iph->saddr), ntohs(th->source),
1215 NIPQUAD(iph->daddr), ntohs(th->dest),
1216 genhash ? " tcp_v4_calc_md5_hash failed" : "");
1225 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1227 .obj_size = sizeof(struct tcp_request_sock),
1228 .rtx_syn_ack = tcp_v4_send_synack,
1229 .send_ack = tcp_v4_reqsk_send_ack,
1230 .destructor = tcp_v4_reqsk_destructor,
1231 .send_reset = tcp_v4_send_reset,
1234 #ifdef CONFIG_TCP_MD5SIG
1235 static struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1236 .md5_lookup = tcp_v4_reqsk_md5_lookup,
1240 static struct timewait_sock_ops tcp_timewait_sock_ops = {
1241 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1242 .twsk_unique = tcp_twsk_unique,
1243 .twsk_destructor= tcp_twsk_destructor,
1246 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1248 struct inet_request_sock *ireq;
1249 struct tcp_options_received tmp_opt;
1250 struct request_sock *req;
1251 __be32 saddr = ip_hdr(skb)->saddr;
1252 __be32 daddr = ip_hdr(skb)->daddr;
1253 __u32 isn = TCP_SKB_CB(skb)->when;
1254 struct dst_entry *dst = NULL;
1255 #ifdef CONFIG_SYN_COOKIES
1256 int want_cookie = 0;
1258 #define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1261 /* Never answer to SYNs send to broadcast or multicast */
1262 if (skb->rtable->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1265 /* TW buckets are converted to open requests without
1266 * limitations, they conserve resources and peer is
1267 * evidently real one.
1269 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1270 #ifdef CONFIG_SYN_COOKIES
1271 if (sysctl_tcp_syncookies) {
1278 /* Accept backlog is full. If we have already queued enough
1279 * of warm entries in syn queue, drop request. It is better than
1280 * clogging syn queue with openreqs with exponentially increasing
1283 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1286 req = reqsk_alloc(&tcp_request_sock_ops);
1290 #ifdef CONFIG_TCP_MD5SIG
1291 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1294 tcp_clear_options(&tmp_opt);
1295 tmp_opt.mss_clamp = 536;
1296 tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss;
1298 tcp_parse_options(skb, &tmp_opt, 0);
1300 if (want_cookie && !tmp_opt.saw_tstamp)
1301 tcp_clear_options(&tmp_opt);
1303 if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
1304 /* Some OSes (unknown ones, but I see them on web server, which
1305 * contains information interesting only for windows'
1306 * users) do not send their stamp in SYN. It is easy case.
1307 * We simply do not advertise TS support.
1309 tmp_opt.saw_tstamp = 0;
1310 tmp_opt.tstamp_ok = 0;
1312 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1314 tcp_openreq_init(req, &tmp_opt, skb);
1316 if (security_inet_conn_request(sk, skb, req))
1319 ireq = inet_rsk(req);
1320 ireq->loc_addr = daddr;
1321 ireq->rmt_addr = saddr;
1322 ireq->opt = tcp_v4_save_options(sk, skb);
1324 TCP_ECN_create_request(req, tcp_hdr(skb));
1327 #ifdef CONFIG_SYN_COOKIES
1328 syn_flood_warning(skb);
1329 req->cookie_ts = tmp_opt.tstamp_ok;
1331 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1333 struct inet_peer *peer = NULL;
1335 /* VJ's idea. We save last timestamp seen
1336 * from the destination in peer table, when entering
1337 * state TIME-WAIT, and check against it before
1338 * accepting new connection request.
1340 * If "isn" is not zero, this request hit alive
1341 * timewait bucket, so that all the necessary checks
1342 * are made in the function processing timewait state.
1344 if (tmp_opt.saw_tstamp &&
1345 tcp_death_row.sysctl_tw_recycle &&
1346 (dst = inet_csk_route_req(sk, req)) != NULL &&
1347 (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1348 peer->v4daddr == saddr) {
1349 if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
1350 (s32)(peer->tcp_ts - req->ts_recent) >
1352 NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
1353 goto drop_and_release;
1356 /* Kill the following clause, if you dislike this way. */
1357 else if (!sysctl_tcp_syncookies &&
1358 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1359 (sysctl_max_syn_backlog >> 2)) &&
1360 (!peer || !peer->tcp_ts_stamp) &&
1361 (!dst || !dst_metric(dst, RTAX_RTT))) {
1362 /* Without syncookies last quarter of
1363 * backlog is filled with destinations,
1364 * proven to be alive.
1365 * It means that we continue to communicate
1366 * to destinations, already remembered
1367 * to the moment of synflood.
1369 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
1370 "request from " NIPQUAD_FMT "/%u\n",
1372 ntohs(tcp_hdr(skb)->source));
1373 goto drop_and_release;
1376 isn = tcp_v4_init_sequence(skb);
1378 tcp_rsk(req)->snt_isn = isn;
1380 if (__tcp_v4_send_synack(sk, req, dst) || want_cookie)
1383 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1396 * The three way handshake has completed - we got a valid synack -
1397 * now create the new socket.
1399 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1400 struct request_sock *req,
1401 struct dst_entry *dst)
1403 struct inet_request_sock *ireq;
1404 struct inet_sock *newinet;
1405 struct tcp_sock *newtp;
1407 #ifdef CONFIG_TCP_MD5SIG
1408 struct tcp_md5sig_key *key;
1411 if (sk_acceptq_is_full(sk))
1414 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
1417 newsk = tcp_create_openreq_child(sk, req, skb);
1421 newsk->sk_gso_type = SKB_GSO_TCPV4;
1422 sk_setup_caps(newsk, dst);
1424 newtp = tcp_sk(newsk);
1425 newinet = inet_sk(newsk);
1426 ireq = inet_rsk(req);
1427 newinet->daddr = ireq->rmt_addr;
1428 newinet->rcv_saddr = ireq->loc_addr;
1429 newinet->saddr = ireq->loc_addr;
1430 newinet->opt = ireq->opt;
1432 newinet->mc_index = inet_iif(skb);
1433 newinet->mc_ttl = ip_hdr(skb)->ttl;
1434 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1436 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
1437 newinet->id = newtp->write_seq ^ jiffies;
1439 tcp_mtup_init(newsk);
1440 tcp_sync_mss(newsk, dst_mtu(dst));
1441 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1442 tcp_initialize_rcv_mss(newsk);
1444 #ifdef CONFIG_TCP_MD5SIG
1445 /* Copy over the MD5 key from the original socket */
1446 if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) {
1448 * We're using one, so create a matching key
1449 * on the newsk structure. If we fail to get
1450 * memory, then we end up not copying the key
1453 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1455 tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr,
1456 newkey, key->keylen);
1460 __inet_hash_nolisten(newsk);
1461 __inet_inherit_port(sk, newsk);
1466 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1468 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1473 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1475 struct tcphdr *th = tcp_hdr(skb);
1476 const struct iphdr *iph = ip_hdr(skb);
1478 struct request_sock **prev;
1479 /* Find possible connection requests. */
1480 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1481 iph->saddr, iph->daddr);
1483 return tcp_check_req(sk, skb, req, prev);
1485 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1486 th->source, iph->daddr, th->dest, inet_iif(skb));
1489 if (nsk->sk_state != TCP_TIME_WAIT) {
1493 inet_twsk_put(inet_twsk(nsk));
1497 #ifdef CONFIG_SYN_COOKIES
1498 if (!th->rst && !th->syn && th->ack)
1499 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1504 static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1506 const struct iphdr *iph = ip_hdr(skb);
1508 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1509 if (!tcp_v4_check(skb->len, iph->saddr,
1510 iph->daddr, skb->csum)) {
1511 skb->ip_summed = CHECKSUM_UNNECESSARY;
1516 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1517 skb->len, IPPROTO_TCP, 0);
1519 if (skb->len <= 76) {
1520 return __skb_checksum_complete(skb);
1526 /* The socket must have it's spinlock held when we get
1529 * We have a potential double-lock case here, so even when
1530 * doing backlog processing we use the BH locking scheme.
1531 * This is because we cannot sleep with the original spinlock
1534 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1537 #ifdef CONFIG_TCP_MD5SIG
1539 * We really want to reject the packet as early as possible
1541 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1542 * o There is an MD5 option and we're not expecting one
1544 if (tcp_v4_inbound_md5_hash(sk, skb))
1548 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1549 TCP_CHECK_TIMER(sk);
1550 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1554 TCP_CHECK_TIMER(sk);
1558 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1561 if (sk->sk_state == TCP_LISTEN) {
1562 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1567 if (tcp_child_process(sk, nsk, skb)) {
1575 TCP_CHECK_TIMER(sk);
1576 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1580 TCP_CHECK_TIMER(sk);
1584 tcp_v4_send_reset(rsk, skb);
1587 /* Be careful here. If this function gets more complicated and
1588 * gcc suffers from register pressure on the x86, sk (in %ebx)
1589 * might be destroyed here. This current version compiles correctly,
1590 * but you have been warned.
1595 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1603 int tcp_v4_rcv(struct sk_buff *skb)
1605 const struct iphdr *iph;
1610 if (skb->pkt_type != PACKET_HOST)
1613 /* Count it even if it's bad */
1614 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1616 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1621 if (th->doff < sizeof(struct tcphdr) / 4)
1623 if (!pskb_may_pull(skb, th->doff * 4))
1626 /* An explanation is required here, I think.
1627 * Packet length and doff are validated by header prediction,
1628 * provided case of th->doff==0 is eliminated.
1629 * So, we defer the checks. */
1630 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1635 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1636 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1637 skb->len - th->doff * 4);
1638 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1639 TCP_SKB_CB(skb)->when = 0;
1640 TCP_SKB_CB(skb)->flags = iph->tos;
1641 TCP_SKB_CB(skb)->sacked = 0;
1643 sk = __inet_lookup(dev_net(skb->dev), &tcp_hashinfo, iph->saddr,
1644 th->source, iph->daddr, th->dest, inet_iif(skb));
1649 if (sk->sk_state == TCP_TIME_WAIT)
1652 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1653 goto discard_and_relse;
1656 if (sk_filter(sk, skb))
1657 goto discard_and_relse;
1661 bh_lock_sock_nested(sk);
1663 if (!sock_owned_by_user(sk)) {
1664 #ifdef CONFIG_NET_DMA
1665 struct tcp_sock *tp = tcp_sk(sk);
1666 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1667 tp->ucopy.dma_chan = get_softnet_dma();
1668 if (tp->ucopy.dma_chan)
1669 ret = tcp_v4_do_rcv(sk, skb);
1673 if (!tcp_prequeue(sk, skb))
1674 ret = tcp_v4_do_rcv(sk, skb);
1677 sk_add_backlog(sk, skb);
1685 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1688 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1690 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1692 tcp_v4_send_reset(NULL, skb);
1696 /* Discard frame. */
1705 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1706 inet_twsk_put(inet_twsk(sk));
1710 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1711 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1712 inet_twsk_put(inet_twsk(sk));
1715 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1717 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1719 iph->daddr, th->dest,
1722 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1723 inet_twsk_put(inet_twsk(sk));
1727 /* Fall through to ACK */
1730 tcp_v4_timewait_ack(sk, skb);
1734 case TCP_TW_SUCCESS:;
1739 /* VJ's idea. Save last timestamp seen from this destination
1740 * and hold it at least for normal timewait interval to use for duplicate
1741 * segment detection in subsequent connections, before they enter synchronized
1745 int tcp_v4_remember_stamp(struct sock *sk)
1747 struct inet_sock *inet = inet_sk(sk);
1748 struct tcp_sock *tp = tcp_sk(sk);
1749 struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1750 struct inet_peer *peer = NULL;
1753 if (!rt || rt->rt_dst != inet->daddr) {
1754 peer = inet_getpeer(inet->daddr, 1);
1758 rt_bind_peer(rt, 1);
1763 if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
1764 (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
1765 peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
1766 peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
1767 peer->tcp_ts = tp->rx_opt.ts_recent;
1777 int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1779 struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
1782 const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1784 if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
1785 (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
1786 peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
1787 peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
1788 peer->tcp_ts = tcptw->tw_ts_recent;
1797 struct inet_connection_sock_af_ops ipv4_specific = {
1798 .queue_xmit = ip_queue_xmit,
1799 .send_check = tcp_v4_send_check,
1800 .rebuild_header = inet_sk_rebuild_header,
1801 .conn_request = tcp_v4_conn_request,
1802 .syn_recv_sock = tcp_v4_syn_recv_sock,
1803 .remember_stamp = tcp_v4_remember_stamp,
1804 .net_header_len = sizeof(struct iphdr),
1805 .setsockopt = ip_setsockopt,
1806 .getsockopt = ip_getsockopt,
1807 .addr2sockaddr = inet_csk_addr2sockaddr,
1808 .sockaddr_len = sizeof(struct sockaddr_in),
1809 .bind_conflict = inet_csk_bind_conflict,
1810 #ifdef CONFIG_COMPAT
1811 .compat_setsockopt = compat_ip_setsockopt,
1812 .compat_getsockopt = compat_ip_getsockopt,
1816 #ifdef CONFIG_TCP_MD5SIG
1817 static struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1818 .md5_lookup = tcp_v4_md5_lookup,
1819 .calc_md5_hash = tcp_v4_calc_md5_hash,
1820 .md5_add = tcp_v4_md5_add_func,
1821 .md5_parse = tcp_v4_parse_md5_keys,
1825 /* NOTE: A lot of things set to zero explicitly by call to
1826 * sk_alloc() so need not be done here.
1828 static int tcp_v4_init_sock(struct sock *sk)
1830 struct inet_connection_sock *icsk = inet_csk(sk);
1831 struct tcp_sock *tp = tcp_sk(sk);
1833 skb_queue_head_init(&tp->out_of_order_queue);
1834 tcp_init_xmit_timers(sk);
1835 tcp_prequeue_init(tp);
1837 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1838 tp->mdev = TCP_TIMEOUT_INIT;
1840 /* So many TCP implementations out there (incorrectly) count the
1841 * initial SYN frame in their delayed-ACK and congestion control
1842 * algorithms that we must have the following bandaid to talk
1843 * efficiently to them. -DaveM
1847 /* See draft-stevens-tcpca-spec-01 for discussion of the
1848 * initialization of these values.
1850 tp->snd_ssthresh = 0x7fffffff; /* Infinity */
1851 tp->snd_cwnd_clamp = ~0;
1852 tp->mss_cache = 536;
1854 tp->reordering = sysctl_tcp_reordering;
1855 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1857 sk->sk_state = TCP_CLOSE;
1859 sk->sk_write_space = sk_stream_write_space;
1860 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1862 icsk->icsk_af_ops = &ipv4_specific;
1863 icsk->icsk_sync_mss = tcp_sync_mss;
1864 #ifdef CONFIG_TCP_MD5SIG
1865 tp->af_specific = &tcp_sock_ipv4_specific;
1868 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1869 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1871 atomic_inc(&tcp_sockets_allocated);
1876 int tcp_v4_destroy_sock(struct sock *sk)
1878 struct tcp_sock *tp = tcp_sk(sk);
1880 tcp_clear_xmit_timers(sk);
1882 tcp_cleanup_congestion_control(sk);
1884 /* Cleanup up the write buffer. */
1885 tcp_write_queue_purge(sk);
1887 /* Cleans up our, hopefully empty, out_of_order_queue. */
1888 __skb_queue_purge(&tp->out_of_order_queue);
1890 #ifdef CONFIG_TCP_MD5SIG
1891 /* Clean up the MD5 key list, if any */
1892 if (tp->md5sig_info) {
1893 tcp_v4_clear_md5_list(sk);
1894 kfree(tp->md5sig_info);
1895 tp->md5sig_info = NULL;
1899 #ifdef CONFIG_NET_DMA
1900 /* Cleans up our sk_async_wait_queue */
1901 __skb_queue_purge(&sk->sk_async_wait_queue);
1904 /* Clean prequeue, it must be empty really */
1905 __skb_queue_purge(&tp->ucopy.prequeue);
1907 /* Clean up a referenced TCP bind bucket. */
1908 if (inet_csk(sk)->icsk_bind_hash)
1912 * If sendmsg cached page exists, toss it.
1914 if (sk->sk_sndmsg_page) {
1915 __free_page(sk->sk_sndmsg_page);
1916 sk->sk_sndmsg_page = NULL;
1919 if (tp->defer_tcp_accept.request) {
1920 reqsk_free(tp->defer_tcp_accept.request);
1921 sock_put(tp->defer_tcp_accept.listen_sk);
1923 tp->defer_tcp_accept.listen_sk = NULL;
1924 tp->defer_tcp_accept.request = NULL;
1927 atomic_dec(&tcp_sockets_allocated);
1932 EXPORT_SYMBOL(tcp_v4_destroy_sock);
1934 #ifdef CONFIG_PROC_FS
1935 /* Proc filesystem TCP sock list dumping. */
1937 static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
1939 return hlist_empty(head) ? NULL :
1940 list_entry(head->first, struct inet_timewait_sock, tw_node);
1943 static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1945 return tw->tw_node.next ?
1946 hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1949 static void *listening_get_next(struct seq_file *seq, void *cur)
1951 struct inet_connection_sock *icsk;
1952 struct hlist_node *node;
1953 struct sock *sk = cur;
1954 struct tcp_iter_state* st = seq->private;
1955 struct net *net = seq_file_net(seq);
1959 sk = sk_head(&tcp_hashinfo.listening_hash[0]);
1965 if (st->state == TCP_SEQ_STATE_OPENREQ) {
1966 struct request_sock *req = cur;
1968 icsk = inet_csk(st->syn_wait_sk);
1972 if (req->rsk_ops->family == st->family &&
1973 net_eq(sock_net(req->sk), net)) {
1979 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1982 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1984 sk = sk_next(st->syn_wait_sk);
1985 st->state = TCP_SEQ_STATE_LISTENING;
1986 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1988 icsk = inet_csk(sk);
1989 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1990 if (reqsk_queue_len(&icsk->icsk_accept_queue))
1992 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1996 sk_for_each_from(sk, node) {
1997 if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {
2001 icsk = inet_csk(sk);
2002 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2003 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2005 st->uid = sock_i_uid(sk);
2006 st->syn_wait_sk = sk;
2007 st->state = TCP_SEQ_STATE_OPENREQ;
2011 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2013 if (++st->bucket < INET_LHTABLE_SIZE) {
2014 sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]);
2022 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2024 void *rc = listening_get_next(seq, NULL);
2026 while (rc && *pos) {
2027 rc = listening_get_next(seq, rc);
2033 static void *established_get_first(struct seq_file *seq)
2035 struct tcp_iter_state* st = seq->private;
2036 struct net *net = seq_file_net(seq);
2039 for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
2041 struct hlist_node *node;
2042 struct inet_timewait_sock *tw;
2043 rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2046 sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2047 if (sk->sk_family != st->family ||
2048 !net_eq(sock_net(sk), net)) {
2054 st->state = TCP_SEQ_STATE_TIME_WAIT;
2055 inet_twsk_for_each(tw, node,
2056 &tcp_hashinfo.ehash[st->bucket].twchain) {
2057 if (tw->tw_family != st->family ||
2058 !net_eq(twsk_net(tw), net)) {
2064 read_unlock_bh(lock);
2065 st->state = TCP_SEQ_STATE_ESTABLISHED;
2071 static void *established_get_next(struct seq_file *seq, void *cur)
2073 struct sock *sk = cur;
2074 struct inet_timewait_sock *tw;
2075 struct hlist_node *node;
2076 struct tcp_iter_state* st = seq->private;
2077 struct net *net = seq_file_net(seq);
2081 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2085 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2092 read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2093 st->state = TCP_SEQ_STATE_ESTABLISHED;
2095 if (++st->bucket < tcp_hashinfo.ehash_size) {
2096 read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2097 sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
2105 sk_for_each_from(sk, node) {
2106 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2110 st->state = TCP_SEQ_STATE_TIME_WAIT;
2111 tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2119 static void *established_get_idx(struct seq_file *seq, loff_t pos)
2121 void *rc = established_get_first(seq);
2124 rc = established_get_next(seq, rc);
2130 static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2133 struct tcp_iter_state* st = seq->private;
2135 inet_listen_lock(&tcp_hashinfo);
2136 st->state = TCP_SEQ_STATE_LISTENING;
2137 rc = listening_get_idx(seq, &pos);
2140 inet_listen_unlock(&tcp_hashinfo);
2141 st->state = TCP_SEQ_STATE_ESTABLISHED;
2142 rc = established_get_idx(seq, pos);
2148 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2150 struct tcp_iter_state* st = seq->private;
2151 st->state = TCP_SEQ_STATE_LISTENING;
2153 return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2156 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2159 struct tcp_iter_state* st;
2161 if (v == SEQ_START_TOKEN) {
2162 rc = tcp_get_idx(seq, 0);
2167 switch (st->state) {
2168 case TCP_SEQ_STATE_OPENREQ:
2169 case TCP_SEQ_STATE_LISTENING:
2170 rc = listening_get_next(seq, v);
2172 inet_listen_unlock(&tcp_hashinfo);
2173 st->state = TCP_SEQ_STATE_ESTABLISHED;
2174 rc = established_get_first(seq);
2177 case TCP_SEQ_STATE_ESTABLISHED:
2178 case TCP_SEQ_STATE_TIME_WAIT:
2179 rc = established_get_next(seq, v);
2187 static void tcp_seq_stop(struct seq_file *seq, void *v)
2189 struct tcp_iter_state* st = seq->private;
2191 switch (st->state) {
2192 case TCP_SEQ_STATE_OPENREQ:
2194 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2195 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2197 case TCP_SEQ_STATE_LISTENING:
2198 if (v != SEQ_START_TOKEN)
2199 inet_listen_unlock(&tcp_hashinfo);
2201 case TCP_SEQ_STATE_TIME_WAIT:
2202 case TCP_SEQ_STATE_ESTABLISHED:
2204 read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2209 static int tcp_seq_open(struct inode *inode, struct file *file)
2211 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2212 struct tcp_iter_state *s;
2215 err = seq_open_net(inode, file, &afinfo->seq_ops,
2216 sizeof(struct tcp_iter_state));
2220 s = ((struct seq_file *)file->private_data)->private;
2221 s->family = afinfo->family;
2225 int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2228 struct proc_dir_entry *p;
2230 afinfo->seq_fops.open = tcp_seq_open;
2231 afinfo->seq_fops.read = seq_read;
2232 afinfo->seq_fops.llseek = seq_lseek;
2233 afinfo->seq_fops.release = seq_release_net;
2235 afinfo->seq_ops.start = tcp_seq_start;
2236 afinfo->seq_ops.next = tcp_seq_next;
2237 afinfo->seq_ops.stop = tcp_seq_stop;
2239 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2240 &afinfo->seq_fops, afinfo);
2246 void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2248 proc_net_remove(net, afinfo->name);
2251 static void get_openreq4(struct sock *sk, struct request_sock *req,
2252 struct seq_file *f, int i, int uid, int *len)
2254 const struct inet_request_sock *ireq = inet_rsk(req);
2255 int ttd = req->expires - jiffies;
2257 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2258 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n",
2261 ntohs(inet_sk(sk)->sport),
2263 ntohs(ireq->rmt_port),
2265 0, 0, /* could print option size, but that is af dependent. */
2266 1, /* timers active (only the expire timer) */
2267 jiffies_to_clock_t(ttd),
2270 0, /* non standard timer */
2271 0, /* open_requests have no inode */
2272 atomic_read(&sk->sk_refcnt),
2277 static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2280 unsigned long timer_expires;
2281 struct tcp_sock *tp = tcp_sk(sk);
2282 const struct inet_connection_sock *icsk = inet_csk(sk);
2283 struct inet_sock *inet = inet_sk(sk);
2284 __be32 dest = inet->daddr;
2285 __be32 src = inet->rcv_saddr;
2286 __u16 destp = ntohs(inet->dport);
2287 __u16 srcp = ntohs(inet->sport);
2289 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2291 timer_expires = icsk->icsk_timeout;
2292 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2294 timer_expires = icsk->icsk_timeout;
2295 } else if (timer_pending(&sk->sk_timer)) {
2297 timer_expires = sk->sk_timer.expires;
2300 timer_expires = jiffies;
2303 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2304 "%08X %5d %8d %lu %d %p %u %u %u %u %d%n",
2305 i, src, srcp, dest, destp, sk->sk_state,
2306 tp->write_seq - tp->snd_una,
2307 sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog :
2308 (tp->rcv_nxt - tp->copied_seq),
2310 jiffies_to_clock_t(timer_expires - jiffies),
2311 icsk->icsk_retransmits,
2313 icsk->icsk_probes_out,
2315 atomic_read(&sk->sk_refcnt), sk,
2318 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2320 tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh,
2324 static void get_timewait4_sock(struct inet_timewait_sock *tw,
2325 struct seq_file *f, int i, int *len)
2329 int ttd = tw->tw_ttd - jiffies;
2334 dest = tw->tw_daddr;
2335 src = tw->tw_rcv_saddr;
2336 destp = ntohs(tw->tw_dport);
2337 srcp = ntohs(tw->tw_sport);
2339 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2340 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p%n",
2341 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2342 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2343 atomic_read(&tw->tw_refcnt), tw, len);
2348 static int tcp4_seq_show(struct seq_file *seq, void *v)
2350 struct tcp_iter_state* st;
2353 if (v == SEQ_START_TOKEN) {
2354 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2355 " sl local_address rem_address st tx_queue "
2356 "rx_queue tr tm->when retrnsmt uid timeout "
2362 switch (st->state) {
2363 case TCP_SEQ_STATE_LISTENING:
2364 case TCP_SEQ_STATE_ESTABLISHED:
2365 get_tcp4_sock(v, seq, st->num, &len);
2367 case TCP_SEQ_STATE_OPENREQ:
2368 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
2370 case TCP_SEQ_STATE_TIME_WAIT:
2371 get_timewait4_sock(v, seq, st->num, &len);
2374 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
2379 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2383 .owner = THIS_MODULE,
2386 .show = tcp4_seq_show,
2390 static int tcp4_proc_init_net(struct net *net)
2392 return tcp_proc_register(net, &tcp4_seq_afinfo);
2395 static void tcp4_proc_exit_net(struct net *net)
2397 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2400 static struct pernet_operations tcp4_net_ops = {
2401 .init = tcp4_proc_init_net,
2402 .exit = tcp4_proc_exit_net,
2405 int __init tcp4_proc_init(void)
2407 return register_pernet_subsys(&tcp4_net_ops);
2410 void tcp4_proc_exit(void)
2412 unregister_pernet_subsys(&tcp4_net_ops);
2414 #endif /* CONFIG_PROC_FS */
2416 struct proto tcp_prot = {
2418 .owner = THIS_MODULE,
2420 .connect = tcp_v4_connect,
2421 .disconnect = tcp_disconnect,
2422 .accept = inet_csk_accept,
2424 .init = tcp_v4_init_sock,
2425 .destroy = tcp_v4_destroy_sock,
2426 .shutdown = tcp_shutdown,
2427 .setsockopt = tcp_setsockopt,
2428 .getsockopt = tcp_getsockopt,
2429 .recvmsg = tcp_recvmsg,
2430 .backlog_rcv = tcp_v4_do_rcv,
2432 .unhash = inet_unhash,
2433 .get_port = inet_csk_get_port,
2434 .enter_memory_pressure = tcp_enter_memory_pressure,
2435 .sockets_allocated = &tcp_sockets_allocated,
2436 .orphan_count = &tcp_orphan_count,
2437 .memory_allocated = &tcp_memory_allocated,
2438 .memory_pressure = &tcp_memory_pressure,
2439 .sysctl_mem = sysctl_tcp_mem,
2440 .sysctl_wmem = sysctl_tcp_wmem,
2441 .sysctl_rmem = sysctl_tcp_rmem,
2442 .max_header = MAX_TCP_HEADER,
2443 .obj_size = sizeof(struct tcp_sock),
2444 .twsk_prot = &tcp_timewait_sock_ops,
2445 .rsk_prot = &tcp_request_sock_ops,
2446 .h.hashinfo = &tcp_hashinfo,
2447 #ifdef CONFIG_COMPAT
2448 .compat_setsockopt = compat_tcp_setsockopt,
2449 .compat_getsockopt = compat_tcp_getsockopt,
2454 static int __net_init tcp_sk_init(struct net *net)
2456 return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2457 PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2460 static void __net_exit tcp_sk_exit(struct net *net)
2462 inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2465 static struct pernet_operations __net_initdata tcp_sk_ops = {
2466 .init = tcp_sk_init,
2467 .exit = tcp_sk_exit,
2470 void __init tcp_v4_init(void)
2472 if (register_pernet_device(&tcp_sk_ops))
2473 panic("Failed to create the TCP control socket.\n");
2476 EXPORT_SYMBOL(ipv4_specific);
2477 EXPORT_SYMBOL(tcp_hashinfo);
2478 EXPORT_SYMBOL(tcp_prot);
2479 EXPORT_SYMBOL(tcp_v4_conn_request);
2480 EXPORT_SYMBOL(tcp_v4_connect);
2481 EXPORT_SYMBOL(tcp_v4_do_rcv);
2482 EXPORT_SYMBOL(tcp_v4_remember_stamp);
2483 EXPORT_SYMBOL(tcp_v4_send_check);
2484 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2486 #ifdef CONFIG_PROC_FS
2487 EXPORT_SYMBOL(tcp_proc_register);
2488 EXPORT_SYMBOL(tcp_proc_unregister);
2490 EXPORT_SYMBOL(sysctl_tcp_low_latency);