3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
49 #include <net/ndisc.h>
50 #include <net/inet6_hashtables.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
59 #include <net/addrconf.h>
61 #include <net/dsfield.h>
63 #include <asm/uaccess.h>
65 #include <linux/proc_fs.h>
66 #include <linux/seq_file.h>
68 static void tcp_v6_send_reset(struct sk_buff *skb);
69 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
70 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
73 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
74 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
76 static struct tcp_func ipv6_mapped;
77 static struct tcp_func ipv6_specific;
79 int inet6_csk_bind_conflict(const struct sock *sk,
80 const struct inet_bind_bucket *tb)
82 const struct sock *sk2;
83 const struct hlist_node *node;
85 /* We must walk the whole port owner list in this case. -DaveM */
86 sk_for_each_bound(sk2, node, &tb->owners) {
88 (!sk->sk_bound_dev_if ||
89 !sk2->sk_bound_dev_if ||
90 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
91 (!sk->sk_reuse || !sk2->sk_reuse ||
92 sk2->sk_state == TCP_LISTEN) &&
93 ipv6_rcv_saddr_equal(sk, sk2))
100 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
102 return inet_csk_get_port(&tcp_hashinfo, sk, snum,
103 inet6_csk_bind_conflict);
106 static __inline__ void __tcp_v6_hash(struct sock *sk)
108 struct hlist_head *list;
111 BUG_TRAP(sk_unhashed(sk));
113 if (sk->sk_state == TCP_LISTEN) {
114 list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
115 lock = &tcp_hashinfo.lhash_lock;
116 inet_listen_wlock(&tcp_hashinfo);
119 sk->sk_hash = hash = inet6_sk_ehashfn(sk);
120 hash &= (tcp_hashinfo.ehash_size - 1);
121 list = &tcp_hashinfo.ehash[hash].chain;
122 lock = &tcp_hashinfo.ehash[hash].lock;
126 __sk_add_node(sk, list);
127 sock_prot_inc_use(sk->sk_prot);
132 static void tcp_v6_hash(struct sock *sk)
134 if (sk->sk_state != TCP_CLOSE) {
135 struct tcp_sock *tp = tcp_sk(sk);
137 if (tp->af_specific == &ipv6_mapped) {
148 * Open request hash tables.
151 static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd)
155 a = raddr->s6_addr32[0];
156 b = raddr->s6_addr32[1];
157 c = raddr->s6_addr32[2];
159 a += JHASH_GOLDEN_RATIO;
160 b += JHASH_GOLDEN_RATIO;
162 __jhash_mix(a, b, c);
164 a += raddr->s6_addr32[3];
166 __jhash_mix(a, b, c);
168 return c & (TCP_SYNQ_HSIZE - 1);
171 static struct request_sock *tcp_v6_search_req(const struct sock *sk,
172 struct request_sock ***prevp,
174 struct in6_addr *raddr,
175 struct in6_addr *laddr,
178 const struct inet_connection_sock *icsk = inet_csk(sk);
179 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
180 struct request_sock *req, **prev;
182 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
183 (req = *prev) != NULL;
184 prev = &req->dl_next) {
185 const struct tcp6_request_sock *treq = tcp6_rsk(req);
187 if (inet_rsk(req)->rmt_port == rport &&
188 req->rsk_ops->family == AF_INET6 &&
189 ipv6_addr_equal(&treq->rmt_addr, raddr) &&
190 ipv6_addr_equal(&treq->loc_addr, laddr) &&
191 (!treq->iif || treq->iif == iif)) {
192 BUG_TRAP(req->sk == NULL);
201 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
202 struct in6_addr *saddr,
203 struct in6_addr *daddr,
206 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
209 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
211 if (skb->protocol == htons(ETH_P_IPV6)) {
212 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
213 skb->nh.ipv6h->saddr.s6_addr32,
217 return secure_tcp_sequence_number(skb->nh.iph->daddr,
224 static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
225 struct inet_timewait_sock **twp)
227 struct inet_sock *inet = inet_sk(sk);
228 const struct ipv6_pinfo *np = inet6_sk(sk);
229 const struct in6_addr *daddr = &np->rcv_saddr;
230 const struct in6_addr *saddr = &np->daddr;
231 const int dif = sk->sk_bound_dev_if;
232 const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
233 unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport);
234 struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
236 const struct hlist_node *node;
237 struct inet_timewait_sock *tw;
239 prefetch(head->chain.first);
240 write_lock(&head->lock);
242 /* Check TIME-WAIT sockets first. */
243 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
244 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
248 if(*((__u32 *)&(tw->tw_dport)) == ports &&
249 sk2->sk_family == PF_INET6 &&
250 ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
251 ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) &&
252 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
253 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
254 struct tcp_sock *tp = tcp_sk(sk);
256 if (tcptw->tw_ts_recent_stamp &&
258 (sysctl_tcp_tw_reuse &&
259 xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
260 /* See comment in tcp_ipv4.c */
261 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
264 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
265 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
274 /* And established part... */
275 sk_for_each(sk2, node, &head->chain) {
276 if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
281 BUG_TRAP(sk_unhashed(sk));
282 __sk_add_node(sk, &head->chain);
284 sock_prot_inc_use(sk->sk_prot);
285 write_unlock(&head->lock);
289 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
291 /* Silly. Should hash-dance instead... */
292 inet_twsk_deschedule(tw, &tcp_death_row);
293 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
300 write_unlock(&head->lock);
301 return -EADDRNOTAVAIL;
304 static inline u32 tcpv6_port_offset(const struct sock *sk)
306 const struct inet_sock *inet = inet_sk(sk);
307 const struct ipv6_pinfo *np = inet6_sk(sk);
309 return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
314 static int tcp_v6_hash_connect(struct sock *sk)
316 unsigned short snum = inet_sk(sk)->num;
317 struct inet_bind_hashbucket *head;
318 struct inet_bind_bucket *tb;
322 int low = sysctl_local_port_range[0];
323 int high = sysctl_local_port_range[1];
324 int range = high - low;
328 u32 offset = hint + tcpv6_port_offset(sk);
329 struct hlist_node *node;
330 struct inet_timewait_sock *tw = NULL;
333 for (i = 1; i <= range; i++) {
334 port = low + (i + offset) % range;
335 head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
336 spin_lock(&head->lock);
338 /* Does not bother with rcv_saddr checks,
339 * because the established check is already
342 inet_bind_bucket_for_each(tb, node, &head->chain) {
343 if (tb->port == port) {
344 BUG_TRAP(!hlist_empty(&tb->owners));
345 if (tb->fastreuse >= 0)
347 if (!__tcp_v6_check_established(sk,
355 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
357 spin_unlock(&head->lock);
364 spin_unlock(&head->lock);
368 return -EADDRNOTAVAIL;
373 /* Head lock still held and bh's disabled */
374 inet_bind_hash(sk, tb, port);
375 if (sk_unhashed(sk)) {
376 inet_sk(sk)->sport = htons(port);
379 spin_unlock(&head->lock);
382 inet_twsk_deschedule(tw, &tcp_death_row);
390 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
391 tb = inet_csk(sk)->icsk_bind_hash;
392 spin_lock_bh(&head->lock);
394 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
396 spin_unlock_bh(&head->lock);
399 spin_unlock(&head->lock);
400 /* No definite answer... Walk to established hash table */
401 ret = __tcp_v6_check_established(sk, snum, NULL);
408 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
411 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
412 struct inet_sock *inet = inet_sk(sk);
413 struct ipv6_pinfo *np = inet6_sk(sk);
414 struct tcp_sock *tp = tcp_sk(sk);
415 struct in6_addr *saddr = NULL, *final_p = NULL, final;
417 struct dst_entry *dst;
421 if (addr_len < SIN6_LEN_RFC2133)
424 if (usin->sin6_family != AF_INET6)
425 return(-EAFNOSUPPORT);
427 memset(&fl, 0, sizeof(fl));
430 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
431 IP6_ECN_flow_init(fl.fl6_flowlabel);
432 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
433 struct ip6_flowlabel *flowlabel;
434 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
435 if (flowlabel == NULL)
437 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
438 fl6_sock_release(flowlabel);
443 * connect() to INADDR_ANY means loopback (BSD'ism).
446 if(ipv6_addr_any(&usin->sin6_addr))
447 usin->sin6_addr.s6_addr[15] = 0x1;
449 addr_type = ipv6_addr_type(&usin->sin6_addr);
451 if(addr_type & IPV6_ADDR_MULTICAST)
454 if (addr_type&IPV6_ADDR_LINKLOCAL) {
455 if (addr_len >= sizeof(struct sockaddr_in6) &&
456 usin->sin6_scope_id) {
457 /* If interface is set while binding, indices
460 if (sk->sk_bound_dev_if &&
461 sk->sk_bound_dev_if != usin->sin6_scope_id)
464 sk->sk_bound_dev_if = usin->sin6_scope_id;
467 /* Connect to link-local address requires an interface */
468 if (!sk->sk_bound_dev_if)
472 if (tp->rx_opt.ts_recent_stamp &&
473 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
474 tp->rx_opt.ts_recent = 0;
475 tp->rx_opt.ts_recent_stamp = 0;
479 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
480 np->flow_label = fl.fl6_flowlabel;
486 if (addr_type == IPV6_ADDR_MAPPED) {
487 u32 exthdrlen = tp->ext_header_len;
488 struct sockaddr_in sin;
490 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
492 if (__ipv6_only_sock(sk))
495 sin.sin_family = AF_INET;
496 sin.sin_port = usin->sin6_port;
497 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
499 tp->af_specific = &ipv6_mapped;
500 sk->sk_backlog_rcv = tcp_v4_do_rcv;
502 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
505 tp->ext_header_len = exthdrlen;
506 tp->af_specific = &ipv6_specific;
507 sk->sk_backlog_rcv = tcp_v6_do_rcv;
510 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
512 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
519 if (!ipv6_addr_any(&np->rcv_saddr))
520 saddr = &np->rcv_saddr;
522 fl.proto = IPPROTO_TCP;
523 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
524 ipv6_addr_copy(&fl.fl6_src,
525 (saddr ? saddr : &np->saddr));
526 fl.oif = sk->sk_bound_dev_if;
527 fl.fl_ip_dport = usin->sin6_port;
528 fl.fl_ip_sport = inet->sport;
530 if (np->opt && np->opt->srcrt) {
531 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
532 ipv6_addr_copy(&final, &fl.fl6_dst);
533 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
537 err = ip6_dst_lookup(sk, &dst, &fl);
541 ipv6_addr_copy(&fl.fl6_dst, final_p);
543 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
548 ipv6_addr_copy(&np->rcv_saddr, saddr);
551 /* set the source address */
552 ipv6_addr_copy(&np->saddr, saddr);
553 inet->rcv_saddr = LOOPBACK4_IPV6;
555 ip6_dst_store(sk, dst, NULL);
556 sk->sk_route_caps = dst->dev->features &
557 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
559 tp->ext_header_len = 0;
561 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
563 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
565 inet->dport = usin->sin6_port;
567 tcp_set_state(sk, TCP_SYN_SENT);
568 err = tcp_v6_hash_connect(sk);
573 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
578 err = tcp_connect(sk);
585 tcp_set_state(sk, TCP_CLOSE);
589 sk->sk_route_caps = 0;
593 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
594 int type, int code, int offset, __u32 info)
596 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
597 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
598 struct ipv6_pinfo *np;
604 sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
605 th->source, skb->dev->ifindex);
608 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
612 if (sk->sk_state == TCP_TIME_WAIT) {
613 inet_twsk_put((struct inet_timewait_sock *)sk);
618 if (sock_owned_by_user(sk))
619 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
621 if (sk->sk_state == TCP_CLOSE)
625 seq = ntohl(th->seq);
626 if (sk->sk_state != TCP_LISTEN &&
627 !between(seq, tp->snd_una, tp->snd_nxt)) {
628 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
634 if (type == ICMPV6_PKT_TOOBIG) {
635 struct dst_entry *dst = NULL;
637 if (sock_owned_by_user(sk))
639 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
642 /* icmp should have updated the destination cache entry */
643 dst = __sk_dst_check(sk, np->dst_cookie);
646 struct inet_sock *inet = inet_sk(sk);
649 /* BUGGG_FUTURE: Again, it is not clear how
650 to handle rthdr case. Ignore this complexity
653 memset(&fl, 0, sizeof(fl));
654 fl.proto = IPPROTO_TCP;
655 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
656 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
657 fl.oif = sk->sk_bound_dev_if;
658 fl.fl_ip_dport = inet->dport;
659 fl.fl_ip_sport = inet->sport;
661 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
662 sk->sk_err_soft = -err;
666 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
667 sk->sk_err_soft = -err;
674 if (tp->pmtu_cookie > dst_mtu(dst)) {
675 tcp_sync_mss(sk, dst_mtu(dst));
676 tcp_simple_retransmit(sk);
677 } /* else let the usual retransmit timer handle it */
682 icmpv6_err_convert(type, code, &err);
684 /* Might be for an request_sock */
685 switch (sk->sk_state) {
686 struct request_sock *req, **prev;
688 if (sock_owned_by_user(sk))
691 req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr,
692 &hdr->saddr, inet6_iif(skb));
696 /* ICMPs are not backlogged, hence we cannot get
697 * an established socket here.
699 BUG_TRAP(req->sk == NULL);
701 if (seq != tcp_rsk(req)->snt_isn) {
702 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
706 inet_csk_reqsk_queue_drop(sk, req, prev);
710 case TCP_SYN_RECV: /* Cannot happen.
711 It can, it SYNs are crossed. --ANK */
712 if (!sock_owned_by_user(sk)) {
713 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
715 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
719 sk->sk_err_soft = err;
723 if (!sock_owned_by_user(sk) && np->recverr) {
725 sk->sk_error_report(sk);
727 sk->sk_err_soft = err;
735 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
736 struct dst_entry *dst)
738 struct tcp6_request_sock *treq = tcp6_rsk(req);
739 struct ipv6_pinfo *np = inet6_sk(sk);
740 struct sk_buff * skb;
741 struct ipv6_txoptions *opt = NULL;
742 struct in6_addr * final_p = NULL, final;
746 memset(&fl, 0, sizeof(fl));
747 fl.proto = IPPROTO_TCP;
748 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
749 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
750 fl.fl6_flowlabel = 0;
752 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
753 fl.fl_ip_sport = inet_sk(sk)->sport;
758 np->rxopt.bits.osrcrt == 2 &&
760 struct sk_buff *pktopts = treq->pktopts;
761 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
763 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
766 if (opt && opt->srcrt) {
767 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
768 ipv6_addr_copy(&final, &fl.fl6_dst);
769 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
773 err = ip6_dst_lookup(sk, &dst, &fl);
777 ipv6_addr_copy(&fl.fl6_dst, final_p);
778 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
782 skb = tcp_make_synack(sk, dst, req);
784 struct tcphdr *th = skb->h.th;
786 th->check = tcp_v6_check(th, skb->len,
787 &treq->loc_addr, &treq->rmt_addr,
788 csum_partial((char *)th, skb->len, skb->csum));
790 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
791 err = ip6_xmit(sk, skb, &fl, opt, 0);
792 if (err == NET_XMIT_CN)
797 if (opt && opt != np->opt)
798 sock_kfree_s(sk, opt, opt->tot_len);
802 static void tcp_v6_reqsk_destructor(struct request_sock *req)
804 if (tcp6_rsk(req)->pktopts)
805 kfree_skb(tcp6_rsk(req)->pktopts);
808 static struct request_sock_ops tcp6_request_sock_ops = {
810 .obj_size = sizeof(struct tcp6_request_sock),
811 .rtx_syn_ack = tcp_v6_send_synack,
812 .send_ack = tcp_v6_reqsk_send_ack,
813 .destructor = tcp_v6_reqsk_destructor,
814 .send_reset = tcp_v6_send_reset
817 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
819 struct ipv6_pinfo *np = inet6_sk(sk);
820 struct inet6_skb_parm *opt = IP6CB(skb);
823 if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) ||
824 ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && np->rxopt.bits.rxflow) ||
825 (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) ||
826 ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
833 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
836 struct ipv6_pinfo *np = inet6_sk(sk);
838 if (skb->ip_summed == CHECKSUM_HW) {
839 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
840 skb->csum = offsetof(struct tcphdr, check);
842 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
843 csum_partial((char *)th, th->doff<<2,
849 static void tcp_v6_send_reset(struct sk_buff *skb)
851 struct tcphdr *th = skb->h.th, *t1;
852 struct sk_buff *buff;
858 if (!ipv6_unicast_destination(skb))
862 * We need to grab some memory, and put together an RST,
863 * and then put it into the queue to be sent.
866 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
871 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
873 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
875 /* Swap the send and the receive. */
876 memset(t1, 0, sizeof(*t1));
877 t1->dest = th->source;
878 t1->source = th->dest;
879 t1->doff = sizeof(*t1)/4;
883 t1->seq = th->ack_seq;
886 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
887 + skb->len - (th->doff<<2));
890 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
892 memset(&fl, 0, sizeof(fl));
893 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
894 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
896 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
897 sizeof(*t1), IPPROTO_TCP,
900 fl.proto = IPPROTO_TCP;
901 fl.oif = inet6_iif(skb);
902 fl.fl_ip_dport = t1->dest;
903 fl.fl_ip_sport = t1->source;
905 /* sk = NULL, but it is safe for now. RST socket required. */
906 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
908 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
909 ip6_xmit(NULL, buff, &fl, NULL, 0);
910 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
911 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
919 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
921 struct tcphdr *th = skb->h.th, *t1;
922 struct sk_buff *buff;
924 int tot_len = sizeof(struct tcphdr);
929 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
934 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
936 t1 = (struct tcphdr *) skb_push(buff,tot_len);
938 /* Swap the send and the receive. */
939 memset(t1, 0, sizeof(*t1));
940 t1->dest = th->source;
941 t1->source = th->dest;
942 t1->doff = tot_len/4;
943 t1->seq = htonl(seq);
944 t1->ack_seq = htonl(ack);
946 t1->window = htons(win);
949 u32 *ptr = (u32*)(t1 + 1);
950 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
951 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
952 *ptr++ = htonl(tcp_time_stamp);
956 buff->csum = csum_partial((char *)t1, tot_len, 0);
958 memset(&fl, 0, sizeof(fl));
959 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
960 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
962 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
963 tot_len, IPPROTO_TCP,
966 fl.proto = IPPROTO_TCP;
967 fl.oif = inet6_iif(skb);
968 fl.fl_ip_dport = t1->dest;
969 fl.fl_ip_sport = t1->source;
971 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
972 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
973 ip6_xmit(NULL, buff, &fl, NULL, 0);
974 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
982 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
984 struct inet_timewait_sock *tw = inet_twsk(sk);
985 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
987 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
988 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
989 tcptw->tw_ts_recent);
994 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
996 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
1000 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1002 struct request_sock *req, **prev;
1003 const struct tcphdr *th = skb->h.th;
1006 /* Find possible connection requests. */
1007 req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr,
1008 &skb->nh.ipv6h->daddr, inet6_iif(skb));
1010 return tcp_check_req(sk, skb, req, prev);
1012 nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
1013 th->source, &skb->nh.ipv6h->daddr,
1014 ntohs(th->dest), inet6_iif(skb));
1017 if (nsk->sk_state != TCP_TIME_WAIT) {
1021 inet_twsk_put((struct inet_timewait_sock *)nsk);
1025 #if 0 /*def CONFIG_SYN_COOKIES*/
1026 if (!th->rst && !th->syn && th->ack)
1027 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1032 static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1034 struct inet_connection_sock *icsk = inet_csk(sk);
1035 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
1036 const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1038 reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT);
1039 inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT);
1043 /* FIXME: this is substantially similar to the ipv4 code.
1044 * Can some kind of merge be done? -- erics
1046 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1048 struct tcp6_request_sock *treq;
1049 struct ipv6_pinfo *np = inet6_sk(sk);
1050 struct tcp_options_received tmp_opt;
1051 struct tcp_sock *tp = tcp_sk(sk);
1052 struct request_sock *req = NULL;
1053 __u32 isn = TCP_SKB_CB(skb)->when;
1055 if (skb->protocol == htons(ETH_P_IP))
1056 return tcp_v4_conn_request(sk, skb);
1058 if (!ipv6_unicast_destination(skb))
1062 * There are no SYN attacks on IPv6, yet...
1064 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1065 if (net_ratelimit())
1066 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1070 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1073 req = reqsk_alloc(&tcp6_request_sock_ops);
1077 tcp_clear_options(&tmp_opt);
1078 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1079 tmp_opt.user_mss = tp->rx_opt.user_mss;
1081 tcp_parse_options(skb, &tmp_opt, 0);
1083 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1084 tcp_openreq_init(req, &tmp_opt, skb);
1086 treq = tcp6_rsk(req);
1087 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1088 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1089 TCP_ECN_create_request(req, skb->h.th);
1090 treq->pktopts = NULL;
1091 if (ipv6_opt_accepted(sk, skb) ||
1092 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1093 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1094 atomic_inc(&skb->users);
1095 treq->pktopts = skb;
1097 treq->iif = sk->sk_bound_dev_if;
1099 /* So that link locals have meaning */
1100 if (!sk->sk_bound_dev_if &&
1101 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1102 treq->iif = inet6_iif(skb);
1105 isn = tcp_v6_init_sequence(sk,skb);
1107 tcp_rsk(req)->snt_isn = isn;
1109 if (tcp_v6_send_synack(sk, req, NULL))
1112 tcp_v6_synq_add(sk, req);
1120 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1121 return 0; /* don't send reset */
1124 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1125 struct request_sock *req,
1126 struct dst_entry *dst)
1128 struct tcp6_request_sock *treq = tcp6_rsk(req);
1129 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1130 struct tcp6_sock *newtcp6sk;
1131 struct inet_sock *newinet;
1132 struct tcp_sock *newtp;
1134 struct ipv6_txoptions *opt;
1136 if (skb->protocol == htons(ETH_P_IP)) {
1141 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1146 newtcp6sk = (struct tcp6_sock *)newsk;
1147 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1149 newinet = inet_sk(newsk);
1150 newnp = inet6_sk(newsk);
1151 newtp = tcp_sk(newsk);
1153 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1155 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1158 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1161 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1163 newtp->af_specific = &ipv6_mapped;
1164 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1165 newnp->pktoptions = NULL;
1167 newnp->mcast_oif = inet6_iif(skb);
1168 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1171 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1172 * here, tcp_create_openreq_child now does this for us, see the comment in
1173 * that function for the gory details. -acme
1176 /* It is tricky place. Until this moment IPv4 tcp
1177 worked with IPv6 af_tcp.af_specific.
1180 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1187 if (sk_acceptq_is_full(sk))
1190 if (np->rxopt.bits.osrcrt == 2 &&
1191 opt == NULL && treq->pktopts) {
1192 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1194 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1198 struct in6_addr *final_p = NULL, final;
1201 memset(&fl, 0, sizeof(fl));
1202 fl.proto = IPPROTO_TCP;
1203 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1204 if (opt && opt->srcrt) {
1205 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1206 ipv6_addr_copy(&final, &fl.fl6_dst);
1207 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1210 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1211 fl.oif = sk->sk_bound_dev_if;
1212 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1213 fl.fl_ip_sport = inet_sk(sk)->sport;
1215 if (ip6_dst_lookup(sk, &dst, &fl))
1219 ipv6_addr_copy(&fl.fl6_dst, final_p);
1221 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1225 newsk = tcp_create_openreq_child(sk, req, skb);
1230 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1231 * count here, tcp_create_openreq_child now does this for us, see the
1232 * comment in that function for the gory details. -acme
1235 ip6_dst_store(newsk, dst, NULL);
1236 newsk->sk_route_caps = dst->dev->features &
1237 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1239 newtcp6sk = (struct tcp6_sock *)newsk;
1240 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1242 newtp = tcp_sk(newsk);
1243 newinet = inet_sk(newsk);
1244 newnp = inet6_sk(newsk);
1246 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1248 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1249 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1250 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1251 newsk->sk_bound_dev_if = treq->iif;
1253 /* Now IPv6 options...
1255 First: no IPv4 options.
1257 newinet->opt = NULL;
1260 newnp->rxopt.all = np->rxopt.all;
1262 /* Clone pktoptions received with SYN */
1263 newnp->pktoptions = NULL;
1264 if (treq->pktopts != NULL) {
1265 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1266 kfree_skb(treq->pktopts);
1267 treq->pktopts = NULL;
1268 if (newnp->pktoptions)
1269 skb_set_owner_r(newnp->pktoptions, newsk);
1272 newnp->mcast_oif = inet6_iif(skb);
1273 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1275 /* Clone native IPv6 options from listening socket (if any)
1277 Yes, keeping reference count would be much more clever,
1278 but we make one more one thing there: reattach optmem
1282 newnp->opt = ipv6_dup_options(newsk, opt);
1284 sock_kfree_s(sk, opt, opt->tot_len);
1287 newtp->ext_header_len = 0;
1289 newtp->ext_header_len = newnp->opt->opt_nflen +
1290 newnp->opt->opt_flen;
1292 tcp_sync_mss(newsk, dst_mtu(dst));
1293 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1294 tcp_initialize_rcv_mss(newsk);
1296 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1298 __tcp_v6_hash(newsk);
1299 inet_inherit_port(&tcp_hashinfo, sk, newsk);
1304 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1306 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1307 if (opt && opt != np->opt)
1308 sock_kfree_s(sk, opt, opt->tot_len);
1313 static int tcp_v6_checksum_init(struct sk_buff *skb)
1315 if (skb->ip_summed == CHECKSUM_HW) {
1316 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1317 &skb->nh.ipv6h->daddr,skb->csum)) {
1318 skb->ip_summed = CHECKSUM_UNNECESSARY;
1323 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1324 &skb->nh.ipv6h->daddr, 0);
1326 if (skb->len <= 76) {
1327 return __skb_checksum_complete(skb);
1332 /* The socket must have it's spinlock held when we get
1335 * We have a potential double-lock case here, so even when
1336 * doing backlog processing we use the BH locking scheme.
1337 * This is because we cannot sleep with the original spinlock
1340 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1342 struct ipv6_pinfo *np = inet6_sk(sk);
1343 struct tcp_sock *tp;
1344 struct sk_buff *opt_skb = NULL;
1346 /* Imagine: socket is IPv6. IPv4 packet arrives,
1347 goes to IPv4 receive handler and backlogged.
1348 From backlog it always goes here. Kerboom...
1349 Fortunately, tcp_rcv_established and rcv_established
1350 handle them correctly, but it is not case with
1351 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1354 if (skb->protocol == htons(ETH_P_IP))
1355 return tcp_v4_do_rcv(sk, skb);
1357 if (sk_filter(sk, skb, 0))
1361 * socket locking is here for SMP purposes as backlog rcv
1362 * is currently called with bh processing disabled.
1365 /* Do Stevens' IPV6_PKTOPTIONS.
1367 Yes, guys, it is the only place in our code, where we
1368 may make it not affecting IPv4.
1369 The rest of code is protocol independent,
1370 and I do not like idea to uglify IPv4.
1372 Actually, all the idea behind IPV6_PKTOPTIONS
1373 looks not very well thought. For now we latch
1374 options, received in the last packet, enqueued
1375 by tcp. Feel free to propose better solution.
1379 opt_skb = skb_clone(skb, GFP_ATOMIC);
1381 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1382 TCP_CHECK_TIMER(sk);
1383 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1385 TCP_CHECK_TIMER(sk);
1387 goto ipv6_pktoptions;
1391 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1394 if (sk->sk_state == TCP_LISTEN) {
1395 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1400 * Queue it on the new socket if the new socket is active,
1401 * otherwise we just shortcircuit this and continue with
1405 if (tcp_child_process(sk, nsk, skb))
1408 __kfree_skb(opt_skb);
1413 TCP_CHECK_TIMER(sk);
1414 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1416 TCP_CHECK_TIMER(sk);
1418 goto ipv6_pktoptions;
1422 tcp_v6_send_reset(skb);
1425 __kfree_skb(opt_skb);
1429 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1434 /* Do you ask, what is it?
1436 1. skb was enqueued by tcp.
1437 2. skb is added to tail of read queue, rather than out of order.
1438 3. socket is not in passive state.
1439 4. Finally, it really contains options, which user wants to receive.
1442 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1443 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1444 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1445 np->mcast_oif = inet6_iif(opt_skb);
1446 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1447 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1448 if (ipv6_opt_accepted(sk, opt_skb)) {
1449 skb_set_owner_r(opt_skb, sk);
1450 opt_skb = xchg(&np->pktoptions, opt_skb);
1452 __kfree_skb(opt_skb);
1453 opt_skb = xchg(&np->pktoptions, NULL);
1462 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1464 struct sk_buff *skb = *pskb;
1469 if (skb->pkt_type != PACKET_HOST)
1473 * Count it even if it's bad.
1475 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1477 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1482 if (th->doff < sizeof(struct tcphdr)/4)
1484 if (!pskb_may_pull(skb, th->doff*4))
1487 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1488 tcp_v6_checksum_init(skb)))
1492 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1493 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1494 skb->len - th->doff*4);
1495 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1496 TCP_SKB_CB(skb)->when = 0;
1497 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1498 TCP_SKB_CB(skb)->sacked = 0;
1500 sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
1501 &skb->nh.ipv6h->daddr, ntohs(th->dest),
1508 if (sk->sk_state == TCP_TIME_WAIT)
1511 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1512 goto discard_and_relse;
1514 if (sk_filter(sk, skb, 0))
1515 goto discard_and_relse;
1521 if (!sock_owned_by_user(sk)) {
1522 if (!tcp_prequeue(sk, skb))
1523 ret = tcp_v6_do_rcv(sk, skb);
1525 sk_add_backlog(sk, skb);
1529 return ret ? -1 : 0;
1532 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1535 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1537 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1539 tcp_v6_send_reset(skb);
1556 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1557 inet_twsk_put((struct inet_timewait_sock *)sk);
1561 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1562 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1563 inet_twsk_put((struct inet_timewait_sock *)sk);
1567 switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1573 sk2 = inet6_lookup_listener(&tcp_hashinfo,
1574 &skb->nh.ipv6h->daddr,
1575 ntohs(th->dest), inet6_iif(skb));
1577 struct inet_timewait_sock *tw = inet_twsk(sk);
1578 inet_twsk_deschedule(tw, &tcp_death_row);
1583 /* Fall through to ACK */
1586 tcp_v6_timewait_ack(sk, skb);
1590 case TCP_TW_SUCCESS:;
1595 static int tcp_v6_rebuild_header(struct sock *sk)
1598 struct dst_entry *dst;
1599 struct ipv6_pinfo *np = inet6_sk(sk);
1601 dst = __sk_dst_check(sk, np->dst_cookie);
1604 struct inet_sock *inet = inet_sk(sk);
1605 struct in6_addr *final_p = NULL, final;
1608 memset(&fl, 0, sizeof(fl));
1609 fl.proto = IPPROTO_TCP;
1610 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1611 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1612 fl.fl6_flowlabel = np->flow_label;
1613 fl.oif = sk->sk_bound_dev_if;
1614 fl.fl_ip_dport = inet->dport;
1615 fl.fl_ip_sport = inet->sport;
1617 if (np->opt && np->opt->srcrt) {
1618 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1619 ipv6_addr_copy(&final, &fl.fl6_dst);
1620 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1624 err = ip6_dst_lookup(sk, &dst, &fl);
1626 sk->sk_route_caps = 0;
1630 ipv6_addr_copy(&fl.fl6_dst, final_p);
1632 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1633 sk->sk_err_soft = -err;
1637 ip6_dst_store(sk, dst, NULL);
1638 sk->sk_route_caps = dst->dev->features &
1639 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1645 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1647 struct sock *sk = skb->sk;
1648 struct inet_sock *inet = inet_sk(sk);
1649 struct ipv6_pinfo *np = inet6_sk(sk);
1651 struct dst_entry *dst;
1652 struct in6_addr *final_p = NULL, final;
1654 memset(&fl, 0, sizeof(fl));
1655 fl.proto = IPPROTO_TCP;
1656 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1657 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1658 fl.fl6_flowlabel = np->flow_label;
1659 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1660 fl.oif = sk->sk_bound_dev_if;
1661 fl.fl_ip_sport = inet->sport;
1662 fl.fl_ip_dport = inet->dport;
1664 if (np->opt && np->opt->srcrt) {
1665 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1666 ipv6_addr_copy(&final, &fl.fl6_dst);
1667 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1671 dst = __sk_dst_check(sk, np->dst_cookie);
1674 int err = ip6_dst_lookup(sk, &dst, &fl);
1677 sk->sk_err_soft = -err;
1682 ipv6_addr_copy(&fl.fl6_dst, final_p);
1684 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1685 sk->sk_route_caps = 0;
1689 ip6_dst_store(sk, dst, NULL);
1690 sk->sk_route_caps = dst->dev->features &
1691 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1694 skb->dst = dst_clone(dst);
1696 /* Restore final destination back after routing done */
1697 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1699 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1702 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1704 struct ipv6_pinfo *np = inet6_sk(sk);
1705 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1707 sin6->sin6_family = AF_INET6;
1708 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1709 sin6->sin6_port = inet_sk(sk)->dport;
1710 /* We do not store received flowlabel for TCP */
1711 sin6->sin6_flowinfo = 0;
1712 sin6->sin6_scope_id = 0;
1713 if (sk->sk_bound_dev_if &&
1714 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1715 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1718 static int tcp_v6_remember_stamp(struct sock *sk)
1720 /* Alas, not yet... */
1724 static struct tcp_func ipv6_specific = {
1725 .queue_xmit = tcp_v6_xmit,
1726 .send_check = tcp_v6_send_check,
1727 .rebuild_header = tcp_v6_rebuild_header,
1728 .conn_request = tcp_v6_conn_request,
1729 .syn_recv_sock = tcp_v6_syn_recv_sock,
1730 .remember_stamp = tcp_v6_remember_stamp,
1731 .net_header_len = sizeof(struct ipv6hdr),
1733 .setsockopt = ipv6_setsockopt,
1734 .getsockopt = ipv6_getsockopt,
1735 .addr2sockaddr = v6_addr2sockaddr,
1736 .sockaddr_len = sizeof(struct sockaddr_in6)
1740 * TCP over IPv4 via INET6 API
1743 static struct tcp_func ipv6_mapped = {
1744 .queue_xmit = ip_queue_xmit,
1745 .send_check = tcp_v4_send_check,
1746 .rebuild_header = inet_sk_rebuild_header,
1747 .conn_request = tcp_v6_conn_request,
1748 .syn_recv_sock = tcp_v6_syn_recv_sock,
1749 .remember_stamp = tcp_v4_remember_stamp,
1750 .net_header_len = sizeof(struct iphdr),
1752 .setsockopt = ipv6_setsockopt,
1753 .getsockopt = ipv6_getsockopt,
1754 .addr2sockaddr = v6_addr2sockaddr,
1755 .sockaddr_len = sizeof(struct sockaddr_in6)
1760 /* NOTE: A lot of things set to zero explicitly by call to
1761 * sk_alloc() so need not be done here.
1763 static int tcp_v6_init_sock(struct sock *sk)
1765 struct inet_connection_sock *icsk = inet_csk(sk);
1766 struct tcp_sock *tp = tcp_sk(sk);
1768 skb_queue_head_init(&tp->out_of_order_queue);
1769 tcp_init_xmit_timers(sk);
1770 tcp_prequeue_init(tp);
1772 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1773 tp->mdev = TCP_TIMEOUT_INIT;
1775 /* So many TCP implementations out there (incorrectly) count the
1776 * initial SYN frame in their delayed-ACK and congestion control
1777 * algorithms that we must have the following bandaid to talk
1778 * efficiently to them. -DaveM
1782 /* See draft-stevens-tcpca-spec-01 for discussion of the
1783 * initialization of these values.
1785 tp->snd_ssthresh = 0x7fffffff;
1786 tp->snd_cwnd_clamp = ~0;
1787 tp->mss_cache = 536;
1789 tp->reordering = sysctl_tcp_reordering;
1791 sk->sk_state = TCP_CLOSE;
1793 tp->af_specific = &ipv6_specific;
1794 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1795 sk->sk_write_space = sk_stream_write_space;
1796 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1798 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1799 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1801 atomic_inc(&tcp_sockets_allocated);
1806 static int tcp_v6_destroy_sock(struct sock *sk)
1808 tcp_v4_destroy_sock(sk);
1809 return inet6_destroy_sock(sk);
1812 /* Proc filesystem TCPv6 sock list dumping. */
1813 static void get_openreq6(struct seq_file *seq,
1814 struct sock *sk, struct request_sock *req, int i, int uid)
1816 struct in6_addr *dest, *src;
1817 int ttd = req->expires - jiffies;
1822 src = &tcp6_rsk(req)->loc_addr;
1823 dest = &tcp6_rsk(req)->rmt_addr;
1825 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1826 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1828 src->s6_addr32[0], src->s6_addr32[1],
1829 src->s6_addr32[2], src->s6_addr32[3],
1830 ntohs(inet_sk(sk)->sport),
1831 dest->s6_addr32[0], dest->s6_addr32[1],
1832 dest->s6_addr32[2], dest->s6_addr32[3],
1833 ntohs(inet_rsk(req)->rmt_port),
1835 0,0, /* could print option size, but that is af dependent. */
1836 1, /* timers active (only the expire timer) */
1837 jiffies_to_clock_t(ttd),
1840 0, /* non standard timer */
1841 0, /* open_requests have no inode */
1845 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1847 struct in6_addr *dest, *src;
1850 unsigned long timer_expires;
1851 struct inet_sock *inet = inet_sk(sp);
1852 struct tcp_sock *tp = tcp_sk(sp);
1853 const struct inet_connection_sock *icsk = inet_csk(sp);
1854 struct ipv6_pinfo *np = inet6_sk(sp);
1857 src = &np->rcv_saddr;
1858 destp = ntohs(inet->dport);
1859 srcp = ntohs(inet->sport);
1861 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1863 timer_expires = icsk->icsk_timeout;
1864 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1866 timer_expires = icsk->icsk_timeout;
1867 } else if (timer_pending(&sp->sk_timer)) {
1869 timer_expires = sp->sk_timer.expires;
1872 timer_expires = jiffies;
1876 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1877 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
1879 src->s6_addr32[0], src->s6_addr32[1],
1880 src->s6_addr32[2], src->s6_addr32[3], srcp,
1881 dest->s6_addr32[0], dest->s6_addr32[1],
1882 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1884 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
1886 jiffies_to_clock_t(timer_expires - jiffies),
1887 icsk->icsk_retransmits,
1889 icsk->icsk_probes_out,
1891 atomic_read(&sp->sk_refcnt), sp,
1894 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1895 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
1899 static void get_timewait6_sock(struct seq_file *seq,
1900 struct inet_timewait_sock *tw, int i)
1902 struct in6_addr *dest, *src;
1904 struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
1905 int ttd = tw->tw_ttd - jiffies;
1910 dest = &tcp6tw->tw_v6_daddr;
1911 src = &tcp6tw->tw_v6_rcv_saddr;
1912 destp = ntohs(tw->tw_dport);
1913 srcp = ntohs(tw->tw_sport);
1916 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1917 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1919 src->s6_addr32[0], src->s6_addr32[1],
1920 src->s6_addr32[2], src->s6_addr32[3], srcp,
1921 dest->s6_addr32[0], dest->s6_addr32[1],
1922 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1923 tw->tw_substate, 0, 0,
1924 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
1925 atomic_read(&tw->tw_refcnt), tw);
1928 #ifdef CONFIG_PROC_FS
1929 static int tcp6_seq_show(struct seq_file *seq, void *v)
1931 struct tcp_iter_state *st;
1933 if (v == SEQ_START_TOKEN) {
1938 "st tx_queue rx_queue tr tm->when retrnsmt"
1939 " uid timeout inode\n");
1944 switch (st->state) {
1945 case TCP_SEQ_STATE_LISTENING:
1946 case TCP_SEQ_STATE_ESTABLISHED:
1947 get_tcp6_sock(seq, v, st->num);
1949 case TCP_SEQ_STATE_OPENREQ:
1950 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
1952 case TCP_SEQ_STATE_TIME_WAIT:
1953 get_timewait6_sock(seq, v, st->num);
1960 static struct file_operations tcp6_seq_fops;
1961 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1962 .owner = THIS_MODULE,
1965 .seq_show = tcp6_seq_show,
1966 .seq_fops = &tcp6_seq_fops,
1969 int __init tcp6_proc_init(void)
1971 return tcp_proc_register(&tcp6_seq_afinfo);
1974 void tcp6_proc_exit(void)
1976 tcp_proc_unregister(&tcp6_seq_afinfo);
1980 struct proto tcpv6_prot = {
1982 .owner = THIS_MODULE,
1984 .connect = tcp_v6_connect,
1985 .disconnect = tcp_disconnect,
1986 .accept = inet_csk_accept,
1988 .init = tcp_v6_init_sock,
1989 .destroy = tcp_v6_destroy_sock,
1990 .shutdown = tcp_shutdown,
1991 .setsockopt = tcp_setsockopt,
1992 .getsockopt = tcp_getsockopt,
1993 .sendmsg = tcp_sendmsg,
1994 .recvmsg = tcp_recvmsg,
1995 .backlog_rcv = tcp_v6_do_rcv,
1996 .hash = tcp_v6_hash,
1997 .unhash = tcp_unhash,
1998 .get_port = tcp_v6_get_port,
1999 .enter_memory_pressure = tcp_enter_memory_pressure,
2000 .sockets_allocated = &tcp_sockets_allocated,
2001 .memory_allocated = &tcp_memory_allocated,
2002 .memory_pressure = &tcp_memory_pressure,
2003 .orphan_count = &tcp_orphan_count,
2004 .sysctl_mem = sysctl_tcp_mem,
2005 .sysctl_wmem = sysctl_tcp_wmem,
2006 .sysctl_rmem = sysctl_tcp_rmem,
2007 .max_header = MAX_TCP_HEADER,
2008 .obj_size = sizeof(struct tcp6_sock),
2009 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
2010 .rsk_prot = &tcp6_request_sock_ops,
2013 static struct inet6_protocol tcpv6_protocol = {
2014 .handler = tcp_v6_rcv,
2015 .err_handler = tcp_v6_err,
2016 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2019 static struct inet_protosw tcpv6_protosw = {
2020 .type = SOCK_STREAM,
2021 .protocol = IPPROTO_TCP,
2022 .prot = &tcpv6_prot,
2023 .ops = &inet6_stream_ops,
2026 .flags = INET_PROTOSW_PERMANENT,
2029 void __init tcpv6_init(void)
2031 /* register inet6 protocol */
2032 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2033 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2034 inet6_register_protosw(&tcpv6_protosw);