err.no Git - linux-2.6/blob - net/ipv6/ip6_output.c

   1 /*
   2  *      IPv6 output functions
   3  *      Linux INET6 implementation
   4  *
   5  *      Authors:
   6  *      Pedro Roque             <roque@di.fc.ul.pt>
   7  *
   8  *      $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
   9  *
  10  *      Based on linux/net/ipv4/ip_output.c
  11  *
  12  *      This program is free software; you can redistribute it and/or
  13  *      modify it under the terms of the GNU General Public License
  14  *      as published by the Free Software Foundation; either version
  15  *      2 of the License, or (at your option) any later version.
  16  *
  17  *      Changes:
  18  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
  19  *                              extension headers are implemented.
  20  *                              route changes now work.
  21  *                              ip6_forward does not confuse sniffers.
  22  *                              etc.
  23  *
  24  *      H. von Brand    :       Added missing #include <linux/string.h>
  25  *      Imran Patel     :       frag id should be in NBO
  26  *      Kazunori MIYAZAWA @USAGI
  27  *                      :       add ip6_append_data and related functions
  28  *                              for datagram xmit
  29  */
  30
  31 #include <linux/errno.h>
  32 #include <linux/kernel.h>
  33 #include <linux/string.h>
  34 #include <linux/socket.h>
  35 #include <linux/net.h>
  36 #include <linux/netdevice.h>
  37 #include <linux/if_arp.h>
  38 #include <linux/in6.h>
  39 #include <linux/tcp.h>
  40 #include <linux/route.h>
  41 #include <linux/module.h>
  42
  43 #include <linux/netfilter.h>
  44 #include <linux/netfilter_ipv6.h>
  45
  46 #include <net/sock.h>
  47 #include <net/snmp.h>
  48
  49 #include <net/ipv6.h>
  50 #include <net/ndisc.h>
  51 #include <net/protocol.h>
  52 #include <net/ip6_route.h>
  53 #include <net/addrconf.h>
  54 #include <net/rawv6.h>
  55 #include <net/icmp.h>
  56 #include <net/xfrm.h>
  57 #include <net/checksum.h>
  58
  59 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
  60
  61 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
  62 {
  63         static u32 ipv6_fragmentation_id = 1;
  64         static DEFINE_SPINLOCK(ip6_id_lock);
  65
  66         spin_lock_bh(&ip6_id_lock);
  67         fhdr->identification = htonl(ipv6_fragmentation_id);
  68         if (++ipv6_fragmentation_id == 0)
  69                 ipv6_fragmentation_id = 1;
  70         spin_unlock_bh(&ip6_id_lock);
  71 }
  72
  73 int __ip6_local_out(struct sk_buff *skb)
  74 {
  75         int len;
  76
  77         len = skb->len - sizeof(struct ipv6hdr);
  78         if (len > IPV6_MAXPLEN)
  79                 len = 0;
  80         ipv6_hdr(skb)->payload_len = htons(len);
  81
  82         return nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev,
  83                        dst_output);
  84 }
  85
  86 int ip6_local_out(struct sk_buff *skb)
  87 {
  88         int err;
  89
  90         err = __ip6_local_out(skb);
  91         if (likely(err == 1))
  92                 err = dst_output(skb);
  93
  94         return err;
  95 }
  96 EXPORT_SYMBOL_GPL(ip6_local_out);
  97
  98 static int ip6_output_finish(struct sk_buff *skb)
  99 {
 100         struct dst_entry *dst = skb->dst;
 101
 102         if (dst->hh)
 103                 return neigh_hh_output(dst->hh, skb);
 104         else if (dst->neighbour)
 105                 return dst->neighbour->output(skb);
 106
 107         IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 108         kfree_skb(skb);
 109         return -EINVAL;
 110
 111 }
 112
 113 /* dev_loopback_xmit for use with netfilter. */
 114 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
 115 {
 116         skb_reset_mac_header(newskb);
 117         __skb_pull(newskb, skb_network_offset(newskb));
 118         newskb->pkt_type = PACKET_LOOPBACK;
 119         newskb->ip_summed = CHECKSUM_UNNECESSARY;
 120         BUG_TRAP(newskb->dst);
 121
 122         netif_rx(newskb);
 123         return 0;
 124 }
 125
 126
 127 static int ip6_output2(struct sk_buff *skb)
 128 {
 129         struct dst_entry *dst = skb->dst;
 130         struct net_device *dev = dst->dev;
 131
 132         skb->protocol = htons(ETH_P_IPV6);
 133         skb->dev = dev;
 134
 135         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
 136                 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
 137                 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
 138
 139                 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
 140                     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
 141                                         &ipv6_hdr(skb)->saddr)) {
 142                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 143
 144                         /* Do not check for IFF_ALLMULTI; multicast routing
 145                            is not supported in any case.
 146                          */
 147                         if (newskb)
 148                                 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
 149                                         newskb->dev,
 150                                         ip6_dev_loopback_xmit);
 151
 152                         if (ipv6_hdr(skb)->hop_limit == 0) {
 153                                 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
 154                                 kfree_skb(skb);
 155                                 return 0;
 156                         }
 157                 }
 158
 159                 IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
 160         }
 161
 162         return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
 163 }
 164
 165 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
 166 {
 167         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 168
 169         return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
 170                skb->dst->dev->mtu : dst_mtu(skb->dst);
 171 }
 172
 173 int ip6_output(struct sk_buff *skb)
 174 {
 175         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 176                                 dst_allfrag(skb->dst))
 177                 return ip6_fragment(skb, ip6_output2);
 178         else
 179                 return ip6_output2(skb);
 180 }
 181
 182 /*
 183  *      xmit an sk_buff (used by TCP)
 184  */
 185
 186 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 187              struct ipv6_txoptions *opt, int ipfragok)
 188 {
 189         struct ipv6_pinfo *np = inet6_sk(sk);
 190         struct in6_addr *first_hop = &fl->fl6_dst;
 191         struct dst_entry *dst = skb->dst;
 192         struct ipv6hdr *hdr;
 193         u8  proto = fl->proto;
 194         int seg_len = skb->len;
 195         int hlimit, tclass;
 196         u32 mtu;
 197
 198         if (opt) {
 199                 unsigned int head_room;
 200
 201                 /* First: exthdrs may take lots of space (~8K for now)
 202                    MAX_HEADER is not enough.
 203                  */
 204                 head_room = opt->opt_nflen + opt->opt_flen;
 205                 seg_len += head_room;
 206                 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 207
 208                 if (skb_headroom(skb) < head_room) {
 209                         struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 210                         if (skb2 == NULL) {
 211                                 IP6_INC_STATS(ip6_dst_idev(skb->dst),
 212                                               IPSTATS_MIB_OUTDISCARDS);
 213                                 kfree_skb(skb);
 214                                 return -ENOBUFS;
 215                         }
 216                         kfree_skb(skb);
 217                         skb = skb2;
 218                         if (sk)
 219                                 skb_set_owner_w(skb, sk);
 220                 }
 221                 if (opt->opt_flen)
 222                         ipv6_push_frag_opts(skb, opt, &proto);
 223                 if (opt->opt_nflen)
 224                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
 225         }
 226
 227         skb_push(skb, sizeof(struct ipv6hdr));
 228         skb_reset_network_header(skb);
 229         hdr = ipv6_hdr(skb);
 230
 231         /*
 232          *      Fill in the IPv6 header
 233          */
 234
 235         hlimit = -1;
 236         if (np)
 237                 hlimit = np->hop_limit;
 238         if (hlimit < 0)
 239                 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
 240         if (hlimit < 0)
 241                 hlimit = ipv6_get_hoplimit(dst->dev);
 242
 243         tclass = -1;
 244         if (np)
 245                 tclass = np->tclass;
 246         if (tclass < 0)
 247                 tclass = 0;
 248
 249         *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
 250
 251         hdr->payload_len = htons(seg_len);
 252         hdr->nexthdr = proto;
 253         hdr->hop_limit = hlimit;
 254
 255         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
 256         ipv6_addr_copy(&hdr->daddr, first_hop);
 257
 258         skb->priority = sk->sk_priority;
 259
 260         mtu = dst_mtu(dst);
 261         if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
 262                 IP6_INC_STATS(ip6_dst_idev(skb->dst),
 263                               IPSTATS_MIB_OUTREQUESTS);
 264                 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev,
 265                                 dst_output);
 266         }
 267
 268         if (net_ratelimit())
 269                 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
 270         skb->dev = dst->dev;
 271         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
 272         IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
 273         kfree_skb(skb);
 274         return -EMSGSIZE;
 275 }
 276
 277 EXPORT_SYMBOL(ip6_xmit);
 278
 279 /*
 280  *      To avoid extra problems ND packets are send through this
 281  *      routine. It's code duplication but I really want to avoid
 282  *      extra checks since ipv6_build_header is used by TCP (which
 283  *      is for us performance critical)
 284  */
 285
 286 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
 287                struct in6_addr *saddr, struct in6_addr *daddr,
 288                int proto, int len)
 289 {
 290         struct ipv6_pinfo *np = inet6_sk(sk);
 291         struct ipv6hdr *hdr;
 292         int totlen;
 293
 294         skb->protocol = htons(ETH_P_IPV6);
 295         skb->dev = dev;
 296
 297         totlen = len + sizeof(struct ipv6hdr);
 298
 299         skb_reset_network_header(skb);
 300         skb_put(skb, sizeof(struct ipv6hdr));
 301         hdr = ipv6_hdr(skb);
 302
 303         *(__be32*)hdr = htonl(0x60000000);
 304
 305         hdr->payload_len = htons(len);
 306         hdr->nexthdr = proto;
 307         hdr->hop_limit = np->hop_limit;
 308
 309         ipv6_addr_copy(&hdr->saddr, saddr);
 310         ipv6_addr_copy(&hdr->daddr, daddr);
 311
 312         return 0;
 313 }
 314
 315 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 316 {
 317         struct ip6_ra_chain *ra;
 318         struct sock *last = NULL;
 319
 320         read_lock(&ip6_ra_lock);
 321         for (ra = ip6_ra_chain; ra; ra = ra->next) {
 322                 struct sock *sk = ra->sk;
 323                 if (sk && ra->sel == sel &&
 324                     (!sk->sk_bound_dev_if ||
 325                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
 326                         if (last) {
 327                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 328                                 if (skb2)
 329                                         rawv6_rcv(last, skb2);
 330                         }
 331                         last = sk;
 332                 }
 333         }
 334
 335         if (last) {
 336                 rawv6_rcv(last, skb);
 337                 read_unlock(&ip6_ra_lock);
 338                 return 1;
 339         }
 340         read_unlock(&ip6_ra_lock);
 341         return 0;
 342 }
 343
 344 static int ip6_forward_proxy_check(struct sk_buff *skb)
 345 {
 346         struct ipv6hdr *hdr = ipv6_hdr(skb);
 347         u8 nexthdr = hdr->nexthdr;
 348         int offset;
 349
 350         if (ipv6_ext_hdr(nexthdr)) {
 351                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
 352                 if (offset < 0)
 353                         return 0;
 354         } else
 355                 offset = sizeof(struct ipv6hdr);
 356
 357         if (nexthdr == IPPROTO_ICMPV6) {
 358                 struct icmp6hdr *icmp6;
 359
 360                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
 361                                          offset + 1 - skb->data)))
 362                         return 0;
 363
 364                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 365
 366                 switch (icmp6->icmp6_type) {
 367                 case NDISC_ROUTER_SOLICITATION:
 368                 case NDISC_ROUTER_ADVERTISEMENT:
 369                 case NDISC_NEIGHBOUR_SOLICITATION:
 370                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
 371                 case NDISC_REDIRECT:
 372                         /* For reaction involving unicast neighbor discovery
 373                          * message destined to the proxied address, pass it to
 374                          * input function.
 375                          */
 376                         return 1;
 377                 default:
 378                         break;
 379                 }
 380         }
 381
 382         /*
 383          * The proxying router can't forward traffic sent to a link-local
 384          * address, so signal the sender and discard the packet. This
 385          * behavior is clarified by the MIPv6 specification.
 386          */
 387         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 388                 dst_link_failure(skb);
 389                 return -1;
 390         }
 391
 392         return 0;
 393 }
 394
 395 static inline int ip6_forward_finish(struct sk_buff *skb)
 396 {
 397         return dst_output(skb);
 398 }
 399
 400 int ip6_forward(struct sk_buff *skb)
 401 {
 402         struct dst_entry *dst = skb->dst;
 403         struct ipv6hdr *hdr = ipv6_hdr(skb);
 404         struct inet6_skb_parm *opt = IP6CB(skb);
 405
 406         if (ipv6_devconf.forwarding == 0)
 407                 goto error;
 408
 409         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 410                 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 411                 goto drop;
 412         }
 413
 414         skb_forward_csum(skb);
 415
 416         /*
 417          *      We DO NOT make any processing on
 418          *      RA packets, pushing them to user level AS IS
 419          *      without ane WARRANTY that application will be able
 420          *      to interpret them. The reason is that we
 421          *      cannot make anything clever here.
 422          *
 423          *      We are not end-node, so that if packet contains
 424          *      AH/ESP, we cannot make anything.
 425          *      Defragmentation also would be mistake, RA packets
 426          *      cannot be fragmented, because there is no warranty
 427          *      that different fragments will go along one path. --ANK
 428          */
 429         if (opt->ra) {
 430                 u8 *ptr = skb_network_header(skb) + opt->ra;
 431                 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
 432                         return 0;
 433         }
 434
 435         /*
 436          *      check and decrement ttl
 437          */
 438         if (hdr->hop_limit <= 1) {
 439                 /* Force OUTPUT device used as source address */
 440                 skb->dev = dst->dev;
 441                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
 442                             0, skb->dev);
 443                 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
 444
 445                 kfree_skb(skb);
 446                 return -ETIMEDOUT;
 447         }
 448
 449         /* XXX: idev->cnf.proxy_ndp? */
 450         if (ipv6_devconf.proxy_ndp &&
 451             pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) {
 452                 int proxied = ip6_forward_proxy_check(skb);
 453                 if (proxied > 0)
 454                         return ip6_input(skb);
 455                 else if (proxied < 0) {
 456                         IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 457                         goto drop;
 458                 }
 459         }
 460
 461         if (!xfrm6_route_forward(skb)) {
 462                 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 463                 goto drop;
 464         }
 465         dst = skb->dst;
 466
 467         /* IPv6 specs say nothing about it, but it is clear that we cannot
 468            send redirects to source routed frames.
 469            We don't send redirects to frames decapsulated from IPsec.
 470          */
 471         if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
 472             !skb->sp) {
 473                 struct in6_addr *target = NULL;
 474                 struct rt6_info *rt;
 475                 struct neighbour *n = dst->neighbour;
 476
 477                 /*
 478                  *      incoming and outgoing devices are the same
 479                  *      send a redirect.
 480                  */
 481
 482                 rt = (struct rt6_info *) dst;
 483                 if ((rt->rt6i_flags & RTF_GATEWAY))
 484                         target = (struct in6_addr*)&n->primary_key;
 485                 else
 486                         target = &hdr->daddr;
 487
 488                 /* Limit redirects both by destination (here)
 489                    and by source (inside ndisc_send_redirect)
 490                  */
 491                 if (xrlim_allow(dst, 1*HZ))
 492                         ndisc_send_redirect(skb, n, target);
 493         } else {
 494                 int addrtype = ipv6_addr_type(&hdr->saddr);
 495
 496                 /* This check is security critical. */
 497                 if (addrtype & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK))
 498                         goto error;
 499                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
 500                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 501                                 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
 502                         goto error;
 503                 }
 504         }
 505
 506         if (skb->len > dst_mtu(dst)) {
 507                 /* Again, force OUTPUT device used as source address */
 508                 skb->dev = dst->dev;
 509                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
 510                 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
 511                 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
 512                 kfree_skb(skb);
 513                 return -EMSGSIZE;
 514         }
 515
 516         if (skb_cow(skb, dst->dev->hard_header_len)) {
 517                 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
 518                 goto drop;
 519         }
 520
 521         hdr = ipv6_hdr(skb);
 522
 523         /* Mangling hops number delayed to point after skb COW */
 524
 525         hdr->hop_limit--;
 526
 527         IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 528         return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
 529
 530 error:
 531         IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
 532 drop:
 533         kfree_skb(skb);
 534         return -EINVAL;
 535 }
 536
 537 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 538 {
 539         to->pkt_type = from->pkt_type;
 540         to->priority = from->priority;
 541         to->protocol = from->protocol;
 542         dst_release(to->dst);
 543         to->dst = dst_clone(from->dst);
 544         to->dev = from->dev;
 545         to->mark = from->mark;
 546
 547 #ifdef CONFIG_NET_SCHED
 548         to->tc_index = from->tc_index;
 549 #endif
 550         nf_copy(to, from);
 551 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
 552     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
 553         to->nf_trace = from->nf_trace;
 554 #endif
 555         skb_copy_secmark(to, from);
 556 }
 557
 558 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 559 {
 560         u16 offset = sizeof(struct ipv6hdr);
 561         struct ipv6_opt_hdr *exthdr =
 562                                 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
 563         unsigned int packet_len = skb->tail - skb->network_header;
 564         int found_rhdr = 0;
 565         *nexthdr = &ipv6_hdr(skb)->nexthdr;
 566
 567         while (offset + 1 <= packet_len) {
 568
 569                 switch (**nexthdr) {
 570
 571                 case NEXTHDR_HOP:
 572                         break;
 573                 case NEXTHDR_ROUTING:
 574                         found_rhdr = 1;
 575                         break;
 576                 case NEXTHDR_DEST:
 577 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 578                         if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
 579                                 break;
 580 #endif
 581                         if (found_rhdr)
 582                                 return offset;
 583                         break;
 584                 default :
 585                         return offset;
 586                 }
 587
 588                 offset += ipv6_optlen(exthdr);
 589                 *nexthdr = &exthdr->nexthdr;
 590                 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
 591                                                  offset);
 592         }
 593
 594         return offset;
 595 }
 596 EXPORT_SYMBOL_GPL(ip6_find_1stfragopt);
 597
 598 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 599 {
 600         struct net_device *dev;
 601         struct sk_buff *frag;
 602         struct rt6_info *rt = (struct rt6_info*)skb->dst;
 603         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 604         struct ipv6hdr *tmp_hdr;
 605         struct frag_hdr *fh;
 606         unsigned int mtu, hlen, left, len;
 607         __be32 frag_id = 0;
 608         int ptr, offset = 0, err=0;
 609         u8 *prevhdr, nexthdr = 0;
 610
 611         dev = rt->u.dst.dev;
 612         hlen = ip6_find_1stfragopt(skb, &prevhdr);
 613         nexthdr = *prevhdr;
 614
 615         mtu = ip6_skb_dst_mtu(skb);
 616
 617         /* We must not fragment if the socket is set to force MTU discovery
 618          * or if the skb it not generated by a local socket.  (This last
 619          * check should be redundant, but it's free.)
 620          */
 621         if (!np || np->pmtudisc >= IPV6_PMTUDISC_DO) {
 622                 skb->dev = skb->dst->dev;
 623                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
 624                 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
 625                 kfree_skb(skb);
 626                 return -EMSGSIZE;
 627         }
 628
 629         if (np && np->frag_size < mtu) {
 630                 if (np->frag_size)
 631                         mtu = np->frag_size;
 632         }
 633         mtu -= hlen + sizeof(struct frag_hdr);
 634
 635         if (skb_shinfo(skb)->frag_list) {
 636                 int first_len = skb_pagelen(skb);
 637
 638                 if (first_len - hlen > mtu ||
 639                     ((first_len - hlen) & 7) ||
 640                     skb_cloned(skb))
 641                         goto slow_path;
 642
 643                 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
 644                         /* Correct geometry. */
 645                         if (frag->len > mtu ||
 646                             ((frag->len & 7) && frag->next) ||
 647                             skb_headroom(frag) < hlen)
 648                             goto slow_path;
 649
 650                         /* Partially cloned skb? */
 651                         if (skb_shared(frag))
 652                                 goto slow_path;
 653
 654                         BUG_ON(frag->sk);
 655                         if (skb->sk) {
 656                                 sock_hold(skb->sk);
 657                                 frag->sk = skb->sk;
 658                                 frag->destructor = sock_wfree;
 659                                 skb->truesize -= frag->truesize;
 660                         }
 661                 }
 662
 663                 err = 0;
 664                 offset = 0;
 665                 frag = skb_shinfo(skb)->frag_list;
 666                 skb_shinfo(skb)->frag_list = NULL;
 667                 /* BUILD HEADER */
 668
 669                 *prevhdr = NEXTHDR_FRAGMENT;
 670                 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 671                 if (!tmp_hdr) {
 672                         IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
 673                         return -ENOMEM;
 674                 }
 675
 676                 __skb_pull(skb, hlen);
 677                 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
 678                 __skb_push(skb, hlen);
 679                 skb_reset_network_header(skb);
 680                 memcpy(skb_network_header(skb), tmp_hdr, hlen);
 681
 682                 ipv6_select_ident(skb, fh);
 683                 fh->nexthdr = nexthdr;
 684                 fh->reserved = 0;
 685                 fh->frag_off = htons(IP6_MF);
 686                 frag_id = fh->identification;
 687
 688                 first_len = skb_pagelen(skb);
 689                 skb->data_len = first_len - skb_headlen(skb);
 690                 skb->len = first_len;
 691                 ipv6_hdr(skb)->payload_len = htons(first_len -
 692                                                    sizeof(struct ipv6hdr));
 693
 694                 dst_hold(&rt->u.dst);
 695
 696                 for (;;) {
 697                         /* Prepare header of the next frame,
 698                          * before previous one went down. */
 699                         if (frag) {
 700                                 frag->ip_summed = CHECKSUM_NONE;
 701                                 skb_reset_transport_header(frag);
 702                                 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
 703                                 __skb_push(frag, hlen);
 704                                 skb_reset_network_header(frag);
 705                                 memcpy(skb_network_header(frag), tmp_hdr,
 706                                        hlen);
 707                                 offset += skb->len - hlen - sizeof(struct frag_hdr);
 708                                 fh->nexthdr = nexthdr;
 709                                 fh->reserved = 0;
 710                                 fh->frag_off = htons(offset);
 711                                 if (frag->next != NULL)
 712                                         fh->frag_off |= htons(IP6_MF);
 713                                 fh->identification = frag_id;
 714                                 ipv6_hdr(frag)->payload_len =
 715                                                 htons(frag->len -
 716                                                       sizeof(struct ipv6hdr));
 717                                 ip6_copy_metadata(frag, skb);
 718                         }
 719
 720                         err = output(skb);
 721                         if(!err)
 722                                 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
 723
 724                         if (err || !frag)
 725                                 break;
 726
 727                         skb = frag;
 728                         frag = skb->next;
 729                         skb->next = NULL;
 730                 }
 731
 732                 kfree(tmp_hdr);
 733
 734                 if (err == 0) {
 735                         IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
 736                         dst_release(&rt->u.dst);
 737                         return 0;
 738                 }
 739
 740                 while (frag) {
 741                         skb = frag->next;
 742                         kfree_skb(frag);
 743                         frag = skb;
 744                 }
 745
 746                 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
 747                 dst_release(&rt->u.dst);
 748                 return err;
 749         }
 750
 751 slow_path:
 752         left = skb->len - hlen;         /* Space per frame */
 753         ptr = hlen;                     /* Where to start from */
 754
 755         /*
 756          *      Fragment the datagram.
 757          */
 758
 759         *prevhdr = NEXTHDR_FRAGMENT;
 760
 761         /*
 762          *      Keep copying data until we run out.
 763          */
 764         while(left > 0) {
 765                 len = left;
 766                 /* IF: it doesn't fit, use 'mtu' - the data space left */
 767                 if (len > mtu)
 768                         len = mtu;
 769                 /* IF: we are not sending upto and including the packet end
 770                    then align the next start on an eight byte boundary */
 771                 if (len < left) {
 772                         len &= ~7;
 773                 }
 774                 /*
 775                  *      Allocate buffer.
 776                  */
 777
 778                 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
 779                         NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
 780                         IP6_INC_STATS(ip6_dst_idev(skb->dst),
 781                                       IPSTATS_MIB_FRAGFAILS);
 782                         err = -ENOMEM;
 783                         goto fail;
 784                 }
 785
 786                 /*
 787                  *      Set up data on packet
 788                  */
 789
 790                 ip6_copy_metadata(frag, skb);
 791                 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
 792                 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
 793                 skb_reset_network_header(frag);
 794                 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
 795                 frag->transport_header = (frag->network_header + hlen +
 796                                           sizeof(struct frag_hdr));
 797
 798                 /*
 799                  *      Charge the memory for the fragment to any owner
 800                  *      it might possess
 801                  */
 802                 if (skb->sk)
 803                         skb_set_owner_w(frag, skb->sk);
 804
 805                 /*
 806                  *      Copy the packet header into the new buffer.
 807                  */
 808                 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
 809
 810                 /*
 811                  *      Build fragment header.
 812                  */
 813                 fh->nexthdr = nexthdr;
 814                 fh->reserved = 0;
 815                 if (!frag_id) {
 816                         ipv6_select_ident(skb, fh);
 817                         frag_id = fh->identification;
 818                 } else
 819                         fh->identification = frag_id;
 820
 821                 /*
 822                  *      Copy a block of the IP datagram.
 823                  */
 824                 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
 825                         BUG();
 826                 left -= len;
 827
 828                 fh->frag_off = htons(offset);
 829                 if (left > 0)
 830                         fh->frag_off |= htons(IP6_MF);
 831                 ipv6_hdr(frag)->payload_len = htons(frag->len -
 832                                                     sizeof(struct ipv6hdr));
 833
 834                 ptr += len;
 835                 offset += len;
 836
 837                 /*
 838                  *      Put this fragment into the sending queue.
 839                  */
 840                 err = output(frag);
 841                 if (err)
 842                         goto fail;
 843
 844                 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
 845         }
 846         IP6_INC_STATS(ip6_dst_idev(skb->dst),
 847                       IPSTATS_MIB_FRAGOKS);
 848         kfree_skb(skb);
 849         return err;
 850
 851 fail:
 852         IP6_INC_STATS(ip6_dst_idev(skb->dst),
 853                       IPSTATS_MIB_FRAGFAILS);
 854         kfree_skb(skb);
 855         return err;
 856 }
 857
 858 static inline int ip6_rt_check(struct rt6key *rt_key,
 859                                struct in6_addr *fl_addr,
 860                                struct in6_addr *addr_cache)
 861 {
 862         return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
 863                 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
 864 }
 865
 866 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 867                                           struct dst_entry *dst,
 868                                           struct flowi *fl)
 869 {
 870         struct ipv6_pinfo *np = inet6_sk(sk);
 871         struct rt6_info *rt = (struct rt6_info *)dst;
 872
 873         if (!dst)
 874                 goto out;
 875
 876         /* Yes, checking route validity in not connected
 877          * case is not very simple. Take into account,
 878          * that we do not support routing by source, TOS,
 879          * and MSG_DONTROUTE            --ANK (980726)
 880          *
 881          * 1. ip6_rt_check(): If route was host route,
 882          *    check that cached destination is current.
 883          *    If it is network route, we still may
 884          *    check its validity using saved pointer
 885          *    to the last used address: daddr_cache.
 886          *    We do not want to save whole address now,
 887          *    (because main consumer of this service
 888          *    is tcp, which has not this problem),
 889          *    so that the last trick works only on connected
 890          *    sockets.
 891          * 2. oif also should be the same.
 892          */
 893         if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
 894 #ifdef CONFIG_IPV6_SUBTREES
 895             ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
 896 #endif
 897             (fl->oif && fl->oif != dst->dev->ifindex)) {
 898                 dst_release(dst);
 899                 dst = NULL;
 900         }
 901
 902 out:
 903         return dst;
 904 }
 905
 906 static int ip6_dst_lookup_tail(struct sock *sk,
 907                                struct dst_entry **dst, struct flowi *fl)
 908 {
 909         int err;
 910
 911         if (*dst == NULL)
 912                 *dst = ip6_route_output(sk, fl);
 913
 914         if ((err = (*dst)->error))
 915                 goto out_err_release;
 916
 917         if (ipv6_addr_any(&fl->fl6_src)) {
 918                 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
 919                 if (err)
 920                         goto out_err_release;
 921         }
 922
 923 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 924                 /*
 925                  * Here if the dst entry we've looked up
 926                  * has a neighbour entry that is in the INCOMPLETE
 927                  * state and the src address from the flow is
 928                  * marked as OPTIMISTIC, we release the found
 929                  * dst entry and replace it instead with the
 930                  * dst entry of the nexthop router
 931                  */
 932                 if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
 933                         struct inet6_ifaddr *ifp;
 934                         struct flowi fl_gw;
 935                         int redirect;
 936
 937                         ifp = ipv6_get_ifaddr(&fl->fl6_src, (*dst)->dev, 1);
 938
 939                         redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
 940                         if (ifp)
 941                                 in6_ifa_put(ifp);
 942
 943                         if (redirect) {
 944                                 /*
 945                                  * We need to get the dst entry for the
 946                                  * default router instead
 947                                  */
 948                                 dst_release(*dst);
 949                                 memcpy(&fl_gw, fl, sizeof(struct flowi));
 950                                 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
 951                                 *dst = ip6_route_output(sk, &fl_gw);
 952                                 if ((err = (*dst)->error))
 953                                         goto out_err_release;
 954                         }
 955                 }
 956 #endif
 957
 958         return 0;
 959
 960 out_err_release:
 961         if (err == -ENETUNREACH)
 962                 IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES);
 963         dst_release(*dst);
 964         *dst = NULL;
 965         return err;
 966 }
 967
 968 /**
 969  *      ip6_dst_lookup - perform route lookup on flow
 970  *      @sk: socket which provides route info
 971  *      @dst: pointer to dst_entry * for result
 972  *      @fl: flow to lookup
 973  *
 974  *      This function performs a route lookup on the given flow.
 975  *
 976  *      It returns zero on success, or a standard errno code on error.
 977  */
 978 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
 979 {
 980         *dst = NULL;
 981         return ip6_dst_lookup_tail(sk, dst, fl);
 982 }
 983 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
 984
 985 /**
 986  *      ip6_sk_dst_lookup - perform socket cached route lookup on flow
 987  *      @sk: socket which provides the dst cache and route info
 988  *      @dst: pointer to dst_entry * for result
 989  *      @fl: flow to lookup
 990  *
 991  *      This function performs a route lookup on the given flow with the
 992  *      possibility of using the cached route in the socket if it is valid.
 993  *      It will take the socket dst lock when operating on the dst cache.
 994  *      As a result, this function can only be used in process context.
 995  *
 996  *      It returns zero on success, or a standard errno code on error.
 997  */
 998 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
 999 {
1000         *dst = NULL;
1001         if (sk) {
1002                 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1003                 *dst = ip6_sk_dst_check(sk, *dst, fl);
1004         }
1005
1006         return ip6_dst_lookup_tail(sk, dst, fl);
1007 }
1008 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1009
1010 static inline int ip6_ufo_append_data(struct sock *sk,
1011                         int getfrag(void *from, char *to, int offset, int len,
1012                         int odd, struct sk_buff *skb),
1013                         void *from, int length, int hh_len, int fragheaderlen,
1014                         int transhdrlen, int mtu,unsigned int flags)
1015
1016 {
1017         struct sk_buff *skb;
1018         int err;
1019
1020         /* There is support for UDP large send offload by network
1021          * device, so create one single skb packet containing complete
1022          * udp datagram
1023          */
1024         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1025                 skb = sock_alloc_send_skb(sk,
1026                         hh_len + fragheaderlen + transhdrlen + 20,
1027                         (flags & MSG_DONTWAIT), &err);
1028                 if (skb == NULL)
1029                         return -ENOMEM;
1030
1031                 /* reserve space for Hardware header */
1032                 skb_reserve(skb, hh_len);
1033
1034                 /* create space for UDP/IP header */
1035                 skb_put(skb,fragheaderlen + transhdrlen);
1036
1037                 /* initialize network header pointer */
1038                 skb_reset_network_header(skb);
1039
1040                 /* initialize protocol header pointer */
1041                 skb->transport_header = skb->network_header + fragheaderlen;
1042
1043                 skb->ip_summed = CHECKSUM_PARTIAL;
1044                 skb->csum = 0;
1045                 sk->sk_sndmsg_off = 0;
1046         }
1047
1048         err = skb_append_datato_frags(sk,skb, getfrag, from,
1049                                       (length - transhdrlen));
1050         if (!err) {
1051                 struct frag_hdr fhdr;
1052
1053                 /* specify the length of each IP datagram fragment*/
1054                 skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
1055                                             sizeof(struct frag_hdr);
1056                 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1057                 ipv6_select_ident(skb, &fhdr);
1058                 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1059                 __skb_queue_tail(&sk->sk_write_queue, skb);
1060
1061                 return 0;
1062         }
1063         /* There is not enough support do UPD LSO,
1064          * so follow normal path
1065          */
1066         kfree_skb(skb);
1067
1068         return err;
1069 }
1070
1071 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1072         int offset, int len, int odd, struct sk_buff *skb),
1073         void *from, int length, int transhdrlen,
1074         int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1075         struct rt6_info *rt, unsigned int flags)
1076 {
1077         struct inet_sock *inet = inet_sk(sk);
1078         struct ipv6_pinfo *np = inet6_sk(sk);
1079         struct sk_buff *skb;
1080         unsigned int maxfraglen, fragheaderlen;
1081         int exthdrlen;
1082         int hh_len;
1083         int mtu;
1084         int copy;
1085         int err;
1086         int offset = 0;
1087         int csummode = CHECKSUM_NONE;
1088
1089         if (flags&MSG_PROBE)
1090                 return 0;
1091         if (skb_queue_empty(&sk->sk_write_queue)) {
1092                 /*
1093                  * setup for corking
1094                  */
1095                 if (opt) {
1096                         if (np->cork.opt == NULL) {
1097                                 np->cork.opt = kmalloc(opt->tot_len,
1098                                                        sk->sk_allocation);
1099                                 if (unlikely(np->cork.opt == NULL))
1100                                         return -ENOBUFS;
1101                         } else if (np->cork.opt->tot_len < opt->tot_len) {
1102                                 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
1103                                 return -EINVAL;
1104                         }
1105                         memcpy(np->cork.opt, opt, opt->tot_len);
1106                         inet->cork.flags |= IPCORK_OPT;
1107                         /* need source address above miyazawa*/
1108                 }
1109                 dst_hold(&rt->u.dst);
1110                 np->cork.rt = rt;
1111                 inet->cork.fl = *fl;
1112                 np->cork.hop_limit = hlimit;
1113                 np->cork.tclass = tclass;
1114                 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1115                       rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1116                 if (np->frag_size < mtu) {
1117                         if (np->frag_size)
1118                                 mtu = np->frag_size;
1119                 }
1120                 inet->cork.fragsize = mtu;
1121                 if (dst_allfrag(rt->u.dst.path))
1122                         inet->cork.flags |= IPCORK_ALLFRAG;
1123                 inet->cork.length = 0;
1124                 sk->sk_sndmsg_page = NULL;
1125                 sk->sk_sndmsg_off = 0;
1126                 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1127                             rt->nfheader_len;
1128                 length += exthdrlen;
1129                 transhdrlen += exthdrlen;
1130         } else {
1131                 rt = np->cork.rt;
1132                 fl = &inet->cork.fl;
1133                 if (inet->cork.flags & IPCORK_OPT)
1134                         opt = np->cork.opt;
1135                 transhdrlen = 0;
1136                 exthdrlen = 0;
1137                 mtu = inet->cork.fragsize;
1138         }
1139
1140         hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1141
1142         fragheaderlen = sizeof(struct ipv6hdr) + rt->nfheader_len +
1143                         (opt ? opt->opt_nflen : 0);
1144         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1145
1146         if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1147                 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1148                         ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1149                         return -EMSGSIZE;
1150                 }
1151         }
1152
1153         /*
1154          * Let's try using as much space as possible.
1155          * Use MTU if total length of the message fits into the MTU.
1156          * Otherwise, we need to reserve fragment header and
1157          * fragment alignment (= 8-15 octects, in total).
1158          *
1159          * Note that we may need to "move" the data from the tail of
1160          * of the buffer to the new fragment when we split
1161          * the message.
1162          *
1163          * FIXME: It may be fragmented into multiple chunks
1164          *        at once if non-fragmentable extension headers
1165          *        are too large.
1166          * --yoshfuji
1167          */
1168
1169         inet->cork.length += length;
1170         if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1171             (rt->u.dst.dev->features & NETIF_F_UFO)) {
1172
1173                 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1174                                           fragheaderlen, transhdrlen, mtu,
1175                                           flags);
1176                 if (err)
1177                         goto error;
1178                 return 0;
1179         }
1180
1181         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1182                 goto alloc_new_skb;
1183
1184         while (length > 0) {
1185                 /* Check if the remaining data fits into current packet. */
1186                 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1187                 if (copy < length)
1188                         copy = maxfraglen - skb->len;
1189
1190                 if (copy <= 0) {
1191                         char *data;
1192                         unsigned int datalen;
1193                         unsigned int fraglen;
1194                         unsigned int fraggap;
1195                         unsigned int alloclen;
1196                         struct sk_buff *skb_prev;
1197 alloc_new_skb:
1198                         skb_prev = skb;
1199
1200                         /* There's no room in the current skb */
1201                         if (skb_prev)
1202                                 fraggap = skb_prev->len - maxfraglen;
1203                         else
1204                                 fraggap = 0;
1205
1206                         /*
1207                          * If remaining data exceeds the mtu,
1208                          * we know we need more fragment(s).
1209                          */
1210                         datalen = length + fraggap;
1211                         if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1212                                 datalen = maxfraglen - fragheaderlen;
1213
1214                         fraglen = datalen + fragheaderlen;
1215                         if ((flags & MSG_MORE) &&
1216                             !(rt->u.dst.dev->features&NETIF_F_SG))
1217                                 alloclen = mtu;
1218                         else
1219                                 alloclen = datalen + fragheaderlen;
1220
1221                         /*
1222                          * The last fragment gets additional space at tail.
1223                          * Note: we overallocate on fragments with MSG_MODE
1224                          * because we have no idea if we're the last one.
1225                          */
1226                         if (datalen == length + fraggap)
1227                                 alloclen += rt->u.dst.trailer_len;
1228
1229                         /*
1230                          * We just reserve space for fragment header.
1231                          * Note: this may be overallocation if the message
1232                          * (without MSG_MORE) fits into the MTU.
1233                          */
1234                         alloclen += sizeof(struct frag_hdr);
1235
1236                         if (transhdrlen) {
1237                                 skb = sock_alloc_send_skb(sk,
1238                                                 alloclen + hh_len,
1239                                                 (flags & MSG_DONTWAIT), &err);
1240                         } else {
1241                                 skb = NULL;
1242                                 if (atomic_read(&sk->sk_wmem_alloc) <=
1243                                     2 * sk->sk_sndbuf)
1244                                         skb = sock_wmalloc(sk,
1245                                                            alloclen + hh_len, 1,
1246                                                            sk->sk_allocation);
1247                                 if (unlikely(skb == NULL))
1248                                         err = -ENOBUFS;
1249                         }
1250                         if (skb == NULL)
1251                                 goto error;
1252                         /*
1253                          *      Fill in the control structures
1254                          */
1255                         skb->ip_summed = csummode;
1256                         skb->csum = 0;
1257                         /* reserve for fragmentation */
1258                         skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1259
1260                         /*
1261                          *      Find where to start putting bytes
1262                          */
1263                         data = skb_put(skb, fraglen);
1264                         skb_set_network_header(skb, exthdrlen);
1265                         data += fragheaderlen;
1266                         skb->transport_header = (skb->network_header +
1267                                                  fragheaderlen);
1268                         if (fraggap) {
1269                                 skb->csum = skb_copy_and_csum_bits(
1270                                         skb_prev, maxfraglen,
1271                                         data + transhdrlen, fraggap, 0);
1272                                 skb_prev->csum = csum_sub(skb_prev->csum,
1273                                                           skb->csum);
1274                                 data += fraggap;
1275                                 pskb_trim_unique(skb_prev, maxfraglen);
1276                         }
1277                         copy = datalen - transhdrlen - fraggap;
1278                         if (copy < 0) {
1279                                 err = -EINVAL;
1280                                 kfree_skb(skb);
1281                                 goto error;
1282                         } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1283                                 err = -EFAULT;
1284                                 kfree_skb(skb);
1285                                 goto error;
1286                         }
1287
1288                         offset += copy;
1289                         length -= datalen - fraggap;
1290                         transhdrlen = 0;
1291                         exthdrlen = 0;
1292                         csummode = CHECKSUM_NONE;
1293
1294                         /*
1295                          * Put the packet on the pending queue
1296                          */
1297                         __skb_queue_tail(&sk->sk_write_queue, skb);
1298                         continue;
1299                 }
1300
1301                 if (copy > length)
1302                         copy = length;
1303
1304                 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1305                         unsigned int off;
1306
1307                         off = skb->len;
1308                         if (getfrag(from, skb_put(skb, copy),
1309                                                 offset, copy, off, skb) < 0) {
1310                                 __skb_trim(skb, off);
1311                                 err = -EFAULT;
1312                                 goto error;
1313                         }
1314                 } else {
1315                         int i = skb_shinfo(skb)->nr_frags;
1316                         skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1317                         struct page *page = sk->sk_sndmsg_page;
1318                         int off = sk->sk_sndmsg_off;
1319                         unsigned int left;
1320
1321                         if (page && (left = PAGE_SIZE - off) > 0) {
1322                                 if (copy >= left)
1323                                         copy = left;
1324                                 if (page != frag->page) {
1325                                         if (i == MAX_SKB_FRAGS) {
1326                                                 err = -EMSGSIZE;
1327                                                 goto error;
1328                                         }
1329                                         get_page(page);
1330                                         skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1331                                         frag = &skb_shinfo(skb)->frags[i];
1332                                 }
1333                         } else if(i < MAX_SKB_FRAGS) {
1334                                 if (copy > PAGE_SIZE)
1335                                         copy = PAGE_SIZE;
1336                                 page = alloc_pages(sk->sk_allocation, 0);
1337                                 if (page == NULL) {
1338                                         err = -ENOMEM;
1339                                         goto error;
1340                                 }
1341                                 sk->sk_sndmsg_page = page;
1342                                 sk->sk_sndmsg_off = 0;
1343
1344                                 skb_fill_page_desc(skb, i, page, 0, 0);
1345                                 frag = &skb_shinfo(skb)->frags[i];
1346                         } else {
1347                                 err = -EMSGSIZE;
1348                                 goto error;
1349                         }
1350                         if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1351                                 err = -EFAULT;
1352                                 goto error;
1353                         }
1354                         sk->sk_sndmsg_off += copy;
1355                         frag->size += copy;
1356                         skb->len += copy;
1357                         skb->data_len += copy;
1358                         skb->truesize += copy;
1359                         atomic_add(copy, &sk->sk_wmem_alloc);
1360                 }
1361                 offset += copy;
1362                 length -= copy;
1363         }
1364         return 0;
1365 error:
1366         inet->cork.length -= length;
1367         IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1368         return err;
1369 }
1370
1371 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1372 {
1373         inet->cork.flags &= ~IPCORK_OPT;
1374         kfree(np->cork.opt);
1375         np->cork.opt = NULL;
1376         if (np->cork.rt) {
1377                 dst_release(&np->cork.rt->u.dst);
1378                 np->cork.rt = NULL;
1379                 inet->cork.flags &= ~IPCORK_ALLFRAG;
1380         }
1381         memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1382 }
1383
1384 int ip6_push_pending_frames(struct sock *sk)
1385 {
1386         struct sk_buff *skb, *tmp_skb;
1387         struct sk_buff **tail_skb;
1388         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1389         struct inet_sock *inet = inet_sk(sk);
1390         struct ipv6_pinfo *np = inet6_sk(sk);
1391         struct ipv6hdr *hdr;
1392         struct ipv6_txoptions *opt = np->cork.opt;
1393         struct rt6_info *rt = np->cork.rt;
1394         struct flowi *fl = &inet->cork.fl;
1395         unsigned char proto = fl->proto;
1396         int err = 0;
1397
1398         if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1399                 goto out;
1400         tail_skb = &(skb_shinfo(skb)->frag_list);
1401
1402         /* move skb->data to ip header from ext header */
1403         if (skb->data < skb_network_header(skb))
1404                 __skb_pull(skb, skb_network_offset(skb));
1405         while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1406                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1407                 *tail_skb = tmp_skb;
1408                 tail_skb = &(tmp_skb->next);
1409                 skb->len += tmp_skb->len;
1410                 skb->data_len += tmp_skb->len;
1411                 skb->truesize += tmp_skb->truesize;
1412                 __sock_put(tmp_skb->sk);
1413                 tmp_skb->destructor = NULL;
1414                 tmp_skb->sk = NULL;
1415         }
1416
1417         ipv6_addr_copy(final_dst, &fl->fl6_dst);
1418         __skb_pull(skb, skb_network_header_len(skb));
1419         if (opt && opt->opt_flen)
1420                 ipv6_push_frag_opts(skb, opt, &proto);
1421         if (opt && opt->opt_nflen)
1422                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1423
1424         skb_push(skb, sizeof(struct ipv6hdr));
1425         skb_reset_network_header(skb);
1426         hdr = ipv6_hdr(skb);
1427
1428         *(__be32*)hdr = fl->fl6_flowlabel |
1429                      htonl(0x60000000 | ((int)np->cork.tclass << 20));
1430
1431         hdr->hop_limit = np->cork.hop_limit;
1432         hdr->nexthdr = proto;
1433         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1434         ipv6_addr_copy(&hdr->daddr, final_dst);
1435
1436         skb->priority = sk->sk_priority;
1437
1438         skb->dst = dst_clone(&rt->u.dst);
1439         IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1440         if (proto == IPPROTO_ICMPV6) {
1441                 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1442
1443                 ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
1444                 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1445         }
1446
1447         err = ip6_local_out(skb);
1448         if (err) {
1449                 if (err > 0)
1450                         err = np->recverr ? net_xmit_errno(err) : 0;
1451                 if (err)
1452                         goto error;
1453         }
1454
1455 out:
1456         ip6_cork_release(inet, np);
1457         return err;
1458 error:
1459         goto out;
1460 }
1461
1462 void ip6_flush_pending_frames(struct sock *sk)
1463 {
1464         struct sk_buff *skb;
1465
1466         while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1467                 if (skb->dst)
1468                         IP6_INC_STATS(ip6_dst_idev(skb->dst),
1469                                       IPSTATS_MIB_OUTDISCARDS);
1470                 kfree_skb(skb);
1471         }
1472
1473         ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1474 }