2 * IPv6 output functions
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
10 * Based on linux/net/ipv4/ip_output.c
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 * A.N.Kuznetsov : airthmetics in fragmentation.
19 * extension headers are implemented.
20 * route changes now work.
21 * ip6_forward does not confuse sniffers.
24 * H. von Brand : Added missing #include <linux/string.h>
25 * Imran Patel : frag id should be in NBO
26 * Kazunori MIYAZAWA @USAGI
27 * : add ip6_append_data and related functions
31 #include <linux/config.h>
32 #include <linux/errno.h>
33 #include <linux/types.h>
34 #include <linux/string.h>
35 #include <linux/socket.h>
36 #include <linux/net.h>
37 #include <linux/netdevice.h>
38 #include <linux/if_arp.h>
39 #include <linux/in6.h>
40 #include <linux/tcp.h>
41 #include <linux/route.h>
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
57 #include <net/checksum.h>
59 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
61 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
63 static u32 ipv6_fragmentation_id = 1;
64 static DEFINE_SPINLOCK(ip6_id_lock);
66 spin_lock_bh(&ip6_id_lock);
67 fhdr->identification = htonl(ipv6_fragmentation_id);
68 if (++ipv6_fragmentation_id == 0)
69 ipv6_fragmentation_id = 1;
70 spin_unlock_bh(&ip6_id_lock);
73 static inline int ip6_output_finish(struct sk_buff *skb)
76 struct dst_entry *dst = skb->dst;
77 struct hh_cache *hh = dst->hh;
82 read_lock_bh(&hh->hh_lock);
83 hh_alen = HH_DATA_ALIGN(hh->hh_len);
84 memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
85 read_unlock_bh(&hh->hh_lock);
86 skb_push(skb, hh->hh_len);
87 return hh->hh_output(skb);
88 } else if (dst->neighbour)
89 return dst->neighbour->output(skb);
91 IP6_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
97 /* dev_loopback_xmit for use with netfilter. */
98 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
100 newskb->mac.raw = newskb->data;
101 __skb_pull(newskb, newskb->nh.raw - newskb->data);
102 newskb->pkt_type = PACKET_LOOPBACK;
103 newskb->ip_summed = CHECKSUM_UNNECESSARY;
104 BUG_TRAP(newskb->dst);
111 static int ip6_output2(struct sk_buff *skb)
113 struct dst_entry *dst = skb->dst;
114 struct net_device *dev = dst->dev;
116 skb->protocol = htons(ETH_P_IPV6);
119 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) {
120 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
122 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
123 ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr,
124 &skb->nh.ipv6h->saddr)) {
125 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
127 /* Do not check for IFF_ALLMULTI; multicast routing
128 is not supported in any case.
131 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
133 ip6_dev_loopback_xmit);
135 if (skb->nh.ipv6h->hop_limit == 0) {
136 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
142 IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
145 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
148 int ip6_output(struct sk_buff *skb)
150 if (skb->len > dst_mtu(skb->dst) || dst_allfrag(skb->dst))
151 return ip6_fragment(skb, ip6_output2);
153 return ip6_output2(skb);
156 #ifdef CONFIG_NETFILTER
157 int ip6_route_me_harder(struct sk_buff *skb)
159 struct ipv6hdr *iph = skb->nh.ipv6h;
160 struct dst_entry *dst;
162 .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
165 { .daddr = iph->daddr,
166 .saddr = iph->saddr, } },
167 .proto = iph->nexthdr,
170 dst = ip6_route_output(skb->sk, &fl);
173 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
175 printk(KERN_DEBUG "ip6_route_me_harder: No more route.\n"));
180 /* Drop old route. */
181 dst_release(skb->dst);
189 * xmit an sk_buff (used by TCP)
192 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
193 struct ipv6_txoptions *opt, int ipfragok)
195 struct ipv6_pinfo *np = sk ? inet6_sk(sk) : NULL;
196 struct in6_addr *first_hop = &fl->fl6_dst;
197 struct dst_entry *dst = skb->dst;
199 u8 proto = fl->proto;
200 int seg_len = skb->len;
207 /* First: exthdrs may take lots of space (~8K for now)
208 MAX_HEADER is not enough.
210 head_room = opt->opt_nflen + opt->opt_flen;
211 seg_len += head_room;
212 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
214 if (skb_headroom(skb) < head_room) {
215 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
219 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
223 skb_set_owner_w(skb, sk);
226 ipv6_push_frag_opts(skb, opt, &proto);
228 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
231 hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr));
234 * Fill in the IPv6 header
237 *(u32*)hdr = htonl(0x60000000) | fl->fl6_flowlabel;
240 hlimit = np->hop_limit;
242 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
244 hlimit = ipv6_get_hoplimit(dst->dev);
246 hdr->payload_len = htons(seg_len);
247 hdr->nexthdr = proto;
248 hdr->hop_limit = hlimit;
250 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
251 ipv6_addr_copy(&hdr->daddr, first_hop);
254 if ((skb->len <= mtu) || ipfragok) {
255 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
256 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev,
261 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
263 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
264 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
270 * To avoid extra problems ND packets are send through this
271 * routine. It's code duplication but I really want to avoid
272 * extra checks since ipv6_build_header is used by TCP (which
273 * is for us performance critical)
276 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
277 struct in6_addr *saddr, struct in6_addr *daddr,
280 struct ipv6_pinfo *np = inet6_sk(sk);
284 skb->protocol = htons(ETH_P_IPV6);
287 totlen = len + sizeof(struct ipv6hdr);
289 hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
292 *(u32*)hdr = htonl(0x60000000);
294 hdr->payload_len = htons(len);
295 hdr->nexthdr = proto;
296 hdr->hop_limit = np->hop_limit;
298 ipv6_addr_copy(&hdr->saddr, saddr);
299 ipv6_addr_copy(&hdr->daddr, daddr);
304 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
306 struct ip6_ra_chain *ra;
307 struct sock *last = NULL;
309 read_lock(&ip6_ra_lock);
310 for (ra = ip6_ra_chain; ra; ra = ra->next) {
311 struct sock *sk = ra->sk;
312 if (sk && ra->sel == sel) {
314 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
316 rawv6_rcv(last, skb2);
323 rawv6_rcv(last, skb);
324 read_unlock(&ip6_ra_lock);
327 read_unlock(&ip6_ra_lock);
331 static inline int ip6_forward_finish(struct sk_buff *skb)
333 return dst_output(skb);
336 int ip6_forward(struct sk_buff *skb)
338 struct dst_entry *dst = skb->dst;
339 struct ipv6hdr *hdr = skb->nh.ipv6h;
340 struct inet6_skb_parm *opt = IP6CB(skb);
342 if (ipv6_devconf.forwarding == 0)
345 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
346 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
350 skb->ip_summed = CHECKSUM_NONE;
353 * We DO NOT make any processing on
354 * RA packets, pushing them to user level AS IS
355 * without ane WARRANTY that application will be able
356 * to interpret them. The reason is that we
357 * cannot make anything clever here.
359 * We are not end-node, so that if packet contains
360 * AH/ESP, we cannot make anything.
361 * Defragmentation also would be mistake, RA packets
362 * cannot be fragmented, because there is no warranty
363 * that different fragments will go along one path. --ANK
366 u8 *ptr = skb->nh.raw + opt->ra;
367 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
372 * check and decrement ttl
374 if (hdr->hop_limit <= 1) {
375 /* Force OUTPUT device used as source address */
377 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
384 if (!xfrm6_route_forward(skb)) {
385 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
390 /* IPv6 specs say nothing about it, but it is clear that we cannot
391 send redirects to source routed frames.
393 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0) {
394 struct in6_addr *target = NULL;
396 struct neighbour *n = dst->neighbour;
399 * incoming and outgoing devices are the same
403 rt = (struct rt6_info *) dst;
404 if ((rt->rt6i_flags & RTF_GATEWAY))
405 target = (struct in6_addr*)&n->primary_key;
407 target = &hdr->daddr;
409 /* Limit redirects both by destination (here)
410 and by source (inside ndisc_send_redirect)
412 if (xrlim_allow(dst, 1*HZ))
413 ndisc_send_redirect(skb, n, target);
414 } else if (ipv6_addr_type(&hdr->saddr)&(IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK
415 |IPV6_ADDR_LINKLOCAL)) {
416 /* This check is security critical. */
420 if (skb->len > dst_mtu(dst)) {
421 /* Again, force OUTPUT device used as source address */
423 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
424 IP6_INC_STATS_BH(IPSTATS_MIB_INTOOBIGERRORS);
425 IP6_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
430 if (skb_cow(skb, dst->dev->hard_header_len)) {
431 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
437 /* Mangling hops number delayed to point after skb COW */
441 IP6_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
442 return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
445 IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
451 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
453 to->pkt_type = from->pkt_type;
454 to->priority = from->priority;
455 to->protocol = from->protocol;
456 dst_release(to->dst);
457 to->dst = dst_clone(from->dst);
460 #ifdef CONFIG_NET_SCHED
461 to->tc_index = from->tc_index;
463 #ifdef CONFIG_NETFILTER
464 to->nfmark = from->nfmark;
465 /* Connection association is same as pre-frag packet */
466 to->nfct = from->nfct;
467 nf_conntrack_get(to->nfct);
468 to->nfctinfo = from->nfctinfo;
469 #ifdef CONFIG_BRIDGE_NETFILTER
470 nf_bridge_put(to->nf_bridge);
471 to->nf_bridge = from->nf_bridge;
472 nf_bridge_get(to->nf_bridge);
477 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
479 u16 offset = sizeof(struct ipv6hdr);
480 struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
481 unsigned int packet_len = skb->tail - skb->nh.raw;
483 *nexthdr = &skb->nh.ipv6h->nexthdr;
485 while (offset + 1 <= packet_len) {
490 case NEXTHDR_ROUTING:
492 if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1;
493 if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset;
494 offset += ipv6_optlen(exthdr);
495 *nexthdr = &exthdr->nexthdr;
496 exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
506 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
508 struct net_device *dev;
509 struct sk_buff *frag;
510 struct rt6_info *rt = (struct rt6_info*)skb->dst;
511 struct ipv6hdr *tmp_hdr;
513 unsigned int mtu, hlen, left, len;
515 int ptr, offset = 0, err=0;
516 u8 *prevhdr, nexthdr = 0;
519 hlen = ip6_find_1stfragopt(skb, &prevhdr);
522 mtu = dst_mtu(&rt->u.dst) - hlen - sizeof(struct frag_hdr);
524 if (skb_shinfo(skb)->frag_list) {
525 int first_len = skb_pagelen(skb);
527 if (first_len - hlen > mtu ||
528 ((first_len - hlen) & 7) ||
532 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
533 /* Correct geometry. */
534 if (frag->len > mtu ||
535 ((frag->len & 7) && frag->next) ||
536 skb_headroom(frag) < hlen)
539 /* Partially cloned skb? */
540 if (skb_shared(frag))
547 frag->destructor = sock_wfree;
548 skb->truesize -= frag->truesize;
554 frag = skb_shinfo(skb)->frag_list;
555 skb_shinfo(skb)->frag_list = NULL;
558 tmp_hdr = kmalloc(hlen, GFP_ATOMIC);
560 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
564 *prevhdr = NEXTHDR_FRAGMENT;
565 memcpy(tmp_hdr, skb->nh.raw, hlen);
566 __skb_pull(skb, hlen);
567 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
568 skb->nh.raw = __skb_push(skb, hlen);
569 memcpy(skb->nh.raw, tmp_hdr, hlen);
571 ipv6_select_ident(skb, fh);
572 fh->nexthdr = nexthdr;
574 fh->frag_off = htons(IP6_MF);
575 frag_id = fh->identification;
577 first_len = skb_pagelen(skb);
578 skb->data_len = first_len - skb_headlen(skb);
579 skb->len = first_len;
580 skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr));
584 /* Prepare header of the next frame,
585 * before previous one went down. */
587 frag->ip_summed = CHECKSUM_NONE;
588 frag->h.raw = frag->data;
589 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
590 frag->nh.raw = __skb_push(frag, hlen);
591 memcpy(frag->nh.raw, tmp_hdr, hlen);
592 offset += skb->len - hlen - sizeof(struct frag_hdr);
593 fh->nexthdr = nexthdr;
595 fh->frag_off = htons(offset);
596 if (frag->next != NULL)
597 fh->frag_off |= htons(IP6_MF);
598 fh->identification = frag_id;
599 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
600 ip6_copy_metadata(frag, skb);
616 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
626 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
631 left = skb->len - hlen; /* Space per frame */
632 ptr = hlen; /* Where to start from */
635 * Fragment the datagram.
638 *prevhdr = NEXTHDR_FRAGMENT;
641 * Keep copying data until we run out.
645 /* IF: it doesn't fit, use 'mtu' - the data space left */
648 /* IF: we are not sending upto and including the packet end
649 then align the next start on an eight byte boundary */
657 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
658 NETDEBUG(printk(KERN_INFO "IPv6: frag: no memory for new fragment!\n"));
659 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
665 * Set up data on packet
668 ip6_copy_metadata(frag, skb);
669 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
670 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
671 frag->nh.raw = frag->data;
672 fh = (struct frag_hdr*)(frag->data + hlen);
673 frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr);
676 * Charge the memory for the fragment to any owner
680 skb_set_owner_w(frag, skb->sk);
683 * Copy the packet header into the new buffer.
685 memcpy(frag->nh.raw, skb->data, hlen);
688 * Build fragment header.
690 fh->nexthdr = nexthdr;
693 ipv6_select_ident(skb, fh);
694 frag_id = fh->identification;
696 fh->identification = frag_id;
699 * Copy a block of the IP datagram.
701 if (skb_copy_bits(skb, ptr, frag->h.raw, len))
705 fh->frag_off = htons(offset);
707 fh->frag_off |= htons(IP6_MF);
708 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
714 * Put this fragment into the sending queue.
717 IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES);
724 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
729 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
733 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
739 struct ipv6_pinfo *np = inet6_sk(sk);
741 *dst = sk_dst_check(sk, np->dst_cookie);
743 struct rt6_info *rt = (struct rt6_info*)*dst;
745 /* Yes, checking route validity in not connected
746 case is not very simple. Take into account,
747 that we do not support routing by source, TOS,
748 and MSG_DONTROUTE --ANK (980726)
750 1. If route was host route, check that
751 cached destination is current.
752 If it is network route, we still may
753 check its validity using saved pointer
754 to the last used address: daddr_cache.
755 We do not want to save whole address now,
756 (because main consumer of this service
757 is tcp, which has not this problem),
758 so that the last trick works only on connected
760 2. oif also should be the same.
763 if (((rt->rt6i_dst.plen != 128 ||
764 !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr))
765 && (np->daddr_cache == NULL ||
766 !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache)))
767 || (fl->oif && fl->oif != (*dst)->dev->ifindex)) {
775 *dst = ip6_route_output(sk, fl);
777 if ((err = (*dst)->error))
778 goto out_err_release;
780 if (ipv6_addr_any(&fl->fl6_src)) {
781 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
784 goto out_err_release;
795 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb),
796 void *from, int length, int transhdrlen,
797 int hlimit, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt,
800 struct inet_sock *inet = inet_sk(sk);
801 struct ipv6_pinfo *np = inet6_sk(sk);
803 unsigned int maxfraglen, fragheaderlen;
810 int csummode = CHECKSUM_NONE;
814 if (skb_queue_empty(&sk->sk_write_queue)) {
819 if (np->cork.opt == NULL) {
820 np->cork.opt = kmalloc(opt->tot_len,
822 if (unlikely(np->cork.opt == NULL))
824 } else if (np->cork.opt->tot_len < opt->tot_len) {
825 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
828 memcpy(np->cork.opt, opt, opt->tot_len);
829 inet->cork.flags |= IPCORK_OPT;
830 /* need source address above miyazawa*/
832 dst_hold(&rt->u.dst);
835 np->cork.hop_limit = hlimit;
836 inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
837 if (dst_allfrag(rt->u.dst.path))
838 inet->cork.flags |= IPCORK_ALLFRAG;
839 inet->cork.length = 0;
840 sk->sk_sndmsg_page = NULL;
841 sk->sk_sndmsg_off = 0;
842 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
844 transhdrlen += exthdrlen;
848 if (inet->cork.flags & IPCORK_OPT)
852 mtu = inet->cork.fragsize;
855 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
857 fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0);
858 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
860 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
861 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
862 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
868 * Let's try using as much space as possible.
869 * Use MTU if total length of the message fits into the MTU.
870 * Otherwise, we need to reserve fragment header and
871 * fragment alignment (= 8-15 octects, in total).
873 * Note that we may need to "move" the data from the tail of
874 * of the buffer to the new fragment when we split
877 * FIXME: It may be fragmented into multiple chunks
878 * at once if non-fragmentable extension headers
883 inet->cork.length += length;
885 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
889 /* Check if the remaining data fits into current packet. */
890 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
892 copy = maxfraglen - skb->len;
896 unsigned int datalen;
897 unsigned int fraglen;
898 unsigned int fraggap;
899 unsigned int alloclen;
900 struct sk_buff *skb_prev;
904 /* There's no room in the current skb */
906 fraggap = skb_prev->len - maxfraglen;
911 * If remaining data exceeds the mtu,
912 * we know we need more fragment(s).
914 datalen = length + fraggap;
915 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
916 datalen = maxfraglen - fragheaderlen;
918 fraglen = datalen + fragheaderlen;
919 if ((flags & MSG_MORE) &&
920 !(rt->u.dst.dev->features&NETIF_F_SG))
923 alloclen = datalen + fragheaderlen;
926 * The last fragment gets additional space at tail.
927 * Note: we overallocate on fragments with MSG_MODE
928 * because we have no idea if we're the last one.
930 if (datalen == length + fraggap)
931 alloclen += rt->u.dst.trailer_len;
934 * We just reserve space for fragment header.
935 * Note: this may be overallocation if the message
936 * (without MSG_MORE) fits into the MTU.
938 alloclen += sizeof(struct frag_hdr);
941 skb = sock_alloc_send_skb(sk,
943 (flags & MSG_DONTWAIT), &err);
946 if (atomic_read(&sk->sk_wmem_alloc) <=
948 skb = sock_wmalloc(sk,
949 alloclen + hh_len, 1,
951 if (unlikely(skb == NULL))
957 * Fill in the control structures
959 skb->ip_summed = csummode;
961 /* reserve for fragmentation */
962 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
965 * Find where to start putting bytes
967 data = skb_put(skb, fraglen);
968 skb->nh.raw = data + exthdrlen;
969 data += fragheaderlen;
970 skb->h.raw = data + exthdrlen;
973 skb->csum = skb_copy_and_csum_bits(
974 skb_prev, maxfraglen,
975 data + transhdrlen, fraggap, 0);
976 skb_prev->csum = csum_sub(skb_prev->csum,
979 skb_trim(skb_prev, maxfraglen);
981 copy = datalen - transhdrlen - fraggap;
986 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
993 length -= datalen - fraggap;
996 csummode = CHECKSUM_NONE;
999 * Put the packet on the pending queue
1001 __skb_queue_tail(&sk->sk_write_queue, skb);
1008 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1012 if (getfrag(from, skb_put(skb, copy),
1013 offset, copy, off, skb) < 0) {
1014 __skb_trim(skb, off);
1019 int i = skb_shinfo(skb)->nr_frags;
1020 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1021 struct page *page = sk->sk_sndmsg_page;
1022 int off = sk->sk_sndmsg_off;
1025 if (page && (left = PAGE_SIZE - off) > 0) {
1028 if (page != frag->page) {
1029 if (i == MAX_SKB_FRAGS) {
1034 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1035 frag = &skb_shinfo(skb)->frags[i];
1037 } else if(i < MAX_SKB_FRAGS) {
1038 if (copy > PAGE_SIZE)
1040 page = alloc_pages(sk->sk_allocation, 0);
1045 sk->sk_sndmsg_page = page;
1046 sk->sk_sndmsg_off = 0;
1048 skb_fill_page_desc(skb, i, page, 0, 0);
1049 frag = &skb_shinfo(skb)->frags[i];
1050 skb->truesize += PAGE_SIZE;
1051 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1056 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1060 sk->sk_sndmsg_off += copy;
1063 skb->data_len += copy;
1070 inet->cork.length -= length;
1071 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1075 int ip6_push_pending_frames(struct sock *sk)
1077 struct sk_buff *skb, *tmp_skb;
1078 struct sk_buff **tail_skb;
1079 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1080 struct inet_sock *inet = inet_sk(sk);
1081 struct ipv6_pinfo *np = inet6_sk(sk);
1082 struct ipv6hdr *hdr;
1083 struct ipv6_txoptions *opt = np->cork.opt;
1084 struct rt6_info *rt = np->cork.rt;
1085 struct flowi *fl = &inet->cork.fl;
1086 unsigned char proto = fl->proto;
1089 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1091 tail_skb = &(skb_shinfo(skb)->frag_list);
1093 /* move skb->data to ip header from ext header */
1094 if (skb->data < skb->nh.raw)
1095 __skb_pull(skb, skb->nh.raw - skb->data);
1096 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1097 __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
1098 *tail_skb = tmp_skb;
1099 tail_skb = &(tmp_skb->next);
1100 skb->len += tmp_skb->len;
1101 skb->data_len += tmp_skb->len;
1102 skb->truesize += tmp_skb->truesize;
1103 __sock_put(tmp_skb->sk);
1104 tmp_skb->destructor = NULL;
1108 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1109 __skb_pull(skb, skb->h.raw - skb->nh.raw);
1110 if (opt && opt->opt_flen)
1111 ipv6_push_frag_opts(skb, opt, &proto);
1112 if (opt && opt->opt_nflen)
1113 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1115 skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
1117 *(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000);
1119 if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
1120 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
1122 hdr->payload_len = 0;
1123 hdr->hop_limit = np->cork.hop_limit;
1124 hdr->nexthdr = proto;
1125 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1126 ipv6_addr_copy(&hdr->daddr, final_dst);
1128 skb->dst = dst_clone(&rt->u.dst);
1129 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
1130 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
1133 err = np->recverr ? net_xmit_errno(err) : 0;
1139 inet->cork.flags &= ~IPCORK_OPT;
1141 kfree(np->cork.opt);
1142 np->cork.opt = NULL;
1145 dst_release(&np->cork.rt->u.dst);
1147 inet->cork.flags &= ~IPCORK_ALLFRAG;
1149 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1155 void ip6_flush_pending_frames(struct sock *sk)
1157 struct inet_sock *inet = inet_sk(sk);
1158 struct ipv6_pinfo *np = inet6_sk(sk);
1159 struct sk_buff *skb;
1161 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1162 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1166 inet->cork.flags &= ~IPCORK_OPT;
1169 kfree(np->cork.opt);
1170 np->cork.opt = NULL;
1173 dst_release(&np->cork.rt->u.dst);
1175 inet->cork.flags &= ~IPCORK_ALLFRAG;
1177 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));