]> err.no Git - linux-2.6/blob - net/ipv6/route.c
[IPV6]: ROUTE: Try selecting better route for non-default routes as well.
[linux-2.6] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/capability.h>
28 #include <linux/config.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/init.h>
39 #include <linux/netlink.h>
40 #include <linux/if_arp.h>
41
42 #ifdef  CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46
47 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
53 #include <net/tcp.h>
54 #include <linux/rtnetlink.h>
55 #include <net/dst.h>
56 #include <net/xfrm.h>
57
58 #include <asm/uaccess.h>
59
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74
75 #define CLONE_OFFLINK_ROUTE 0
76
77 #define RT6_SELECT_F_IFACE      0x1
78 #define RT6_SELECT_F_REACHABLE  0x2
79
80 static int ip6_rt_max_size = 4096;
81 static int ip6_rt_gc_min_interval = HZ / 2;
82 static int ip6_rt_gc_timeout = 60*HZ;
83 int ip6_rt_gc_interval = 30*HZ;
84 static int ip6_rt_gc_elasticity = 9;
85 static int ip6_rt_mtu_expires = 10*60*HZ;
86 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91 static void             ip6_dst_destroy(struct dst_entry *);
92 static void             ip6_dst_ifdown(struct dst_entry *,
93                                        struct net_device *dev, int how);
94 static int               ip6_dst_gc(void);
95
96 static int              ip6_pkt_discard(struct sk_buff *skb);
97 static int              ip6_pkt_discard_out(struct sk_buff *skb);
98 static void             ip6_link_failure(struct sk_buff *skb);
99 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
101 static struct dst_ops ip6_dst_ops = {
102         .family                 =       AF_INET6,
103         .protocol               =       __constant_htons(ETH_P_IPV6),
104         .gc                     =       ip6_dst_gc,
105         .gc_thresh              =       1024,
106         .check                  =       ip6_dst_check,
107         .destroy                =       ip6_dst_destroy,
108         .ifdown                 =       ip6_dst_ifdown,
109         .negative_advice        =       ip6_negative_advice,
110         .link_failure           =       ip6_link_failure,
111         .update_pmtu            =       ip6_rt_update_pmtu,
112         .entry_size             =       sizeof(struct rt6_info),
113 };
114
115 struct rt6_info ip6_null_entry = {
116         .u = {
117                 .dst = {
118                         .__refcnt       = ATOMIC_INIT(1),
119                         .__use          = 1,
120                         .dev            = &loopback_dev,
121                         .obsolete       = -1,
122                         .error          = -ENETUNREACH,
123                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
124                         .input          = ip6_pkt_discard,
125                         .output         = ip6_pkt_discard_out,
126                         .ops            = &ip6_dst_ops,
127                         .path           = (struct dst_entry*)&ip6_null_entry,
128                 }
129         },
130         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
131         .rt6i_metric    = ~(u32) 0,
132         .rt6i_ref       = ATOMIC_INIT(1),
133 };
134
135 struct fib6_node ip6_routing_table = {
136         .leaf           = &ip6_null_entry,
137         .fn_flags       = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
138 };
139
140 /* Protects all the ip6 fib */
141
142 DEFINE_RWLOCK(rt6_lock);
143
144
145 /* allocate dst with ip6_dst_ops */
146 static __inline__ struct rt6_info *ip6_dst_alloc(void)
147 {
148         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
149 }
150
151 static void ip6_dst_destroy(struct dst_entry *dst)
152 {
153         struct rt6_info *rt = (struct rt6_info *)dst;
154         struct inet6_dev *idev = rt->rt6i_idev;
155
156         if (idev != NULL) {
157                 rt->rt6i_idev = NULL;
158                 in6_dev_put(idev);
159         }       
160 }
161
162 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
163                            int how)
164 {
165         struct rt6_info *rt = (struct rt6_info *)dst;
166         struct inet6_dev *idev = rt->rt6i_idev;
167
168         if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
169                 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
170                 if (loopback_idev != NULL) {
171                         rt->rt6i_idev = loopback_idev;
172                         in6_dev_put(idev);
173                 }
174         }
175 }
176
177 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
178 {
179         return (rt->rt6i_flags & RTF_EXPIRES &&
180                 time_after(jiffies, rt->rt6i_expires));
181 }
182
183 /*
184  *      Route lookup. Any rt6_lock is implied.
185  */
186
187 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
188                                                     int oif,
189                                                     int strict)
190 {
191         struct rt6_info *local = NULL;
192         struct rt6_info *sprt;
193
194         if (oif) {
195                 for (sprt = rt; sprt; sprt = sprt->u.next) {
196                         struct net_device *dev = sprt->rt6i_dev;
197                         if (dev->ifindex == oif)
198                                 return sprt;
199                         if (dev->flags & IFF_LOOPBACK) {
200                                 if (sprt->rt6i_idev == NULL ||
201                                     sprt->rt6i_idev->dev->ifindex != oif) {
202                                         if (strict && oif)
203                                                 continue;
204                                         if (local && (!oif || 
205                                                       local->rt6i_idev->dev->ifindex == oif))
206                                                 continue;
207                                 }
208                                 local = sprt;
209                         }
210                 }
211
212                 if (local)
213                         return local;
214
215                 if (strict)
216                         return &ip6_null_entry;
217         }
218         return rt;
219 }
220
221 /*
222  * Default Router Selection (RFC 2461 6.3.6)
223  */
224 static int inline rt6_check_dev(struct rt6_info *rt, int oif)
225 {
226         struct net_device *dev = rt->rt6i_dev;
227         if (!oif || dev->ifindex == oif)
228                 return 2;
229         if ((dev->flags & IFF_LOOPBACK) &&
230             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
231                 return 1;
232         return 0;
233 }
234
235 static int inline rt6_check_neigh(struct rt6_info *rt)
236 {
237         struct neighbour *neigh = rt->rt6i_nexthop;
238         int m = 0;
239         if (neigh) {
240                 read_lock_bh(&neigh->lock);
241                 if (neigh->nud_state & NUD_VALID)
242                         m = 1;
243                 read_unlock_bh(&neigh->lock);
244         }
245         return m;
246 }
247
248 static int rt6_score_route(struct rt6_info *rt, int oif,
249                            int strict)
250 {
251         int m = rt6_check_dev(rt, oif);
252         if (!m && (strict & RT6_SELECT_F_IFACE))
253                 return -1;
254         if (rt6_check_neigh(rt))
255                 m |= 4;
256         else if (strict & RT6_SELECT_F_REACHABLE)
257                 return -1;
258         return m;
259 }
260
261 static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
262                                    int strict)
263 {
264         struct rt6_info *match = NULL, *last = NULL;
265         struct rt6_info *rt, *rt0 = *head;
266         u32 metric;
267         int mpri = -1;
268
269         RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
270                   __FUNCTION__, head, head ? *head : NULL, oif);
271
272         for (rt = rt0, metric = rt0->rt6i_metric;
273              rt && rt->rt6i_metric == metric;
274              rt = rt->u.next) {
275                 int m;
276
277                 if (rt6_check_expired(rt))
278                         continue;
279
280                 last = rt;
281
282                 m = rt6_score_route(rt, oif, strict);
283                 if (m < 0)
284                         continue;
285
286                 if (m > mpri) {
287                         match = rt;
288                         mpri = m;
289                 }
290         }
291
292         if (!match &&
293             (strict & RT6_SELECT_F_REACHABLE) &&
294             last && last != rt0) {
295                 /* no entries matched; do round-robin */
296                 *head = rt0->u.next;
297                 rt0->u.next = last->u.next;
298                 last->u.next = rt0;
299         }
300
301         RT6_TRACE("%s() => %p, score=%d\n",
302                   __FUNCTION__, match, mpri);
303
304         return (match ? match : &ip6_null_entry);
305 }
306
307 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
308                             int oif, int strict)
309 {
310         struct fib6_node *fn;
311         struct rt6_info *rt;
312
313         read_lock_bh(&rt6_lock);
314         fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
315         rt = rt6_device_match(fn->leaf, oif, strict);
316         dst_hold(&rt->u.dst);
317         rt->u.dst.__use++;
318         read_unlock_bh(&rt6_lock);
319
320         rt->u.dst.lastuse = jiffies;
321         if (rt->u.dst.error == 0)
322                 return rt;
323         dst_release(&rt->u.dst);
324         return NULL;
325 }
326
327 /* ip6_ins_rt is called with FREE rt6_lock.
328    It takes new route entry, the addition fails by any reason the
329    route is freed. In any case, if caller does not hold it, it may
330    be destroyed.
331  */
332
333 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
334                 void *_rtattr, struct netlink_skb_parms *req)
335 {
336         int err;
337
338         write_lock_bh(&rt6_lock);
339         err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
340         write_unlock_bh(&rt6_lock);
341
342         return err;
343 }
344
345 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
346                                       struct in6_addr *saddr)
347 {
348         struct rt6_info *rt;
349
350         /*
351          *      Clone the route.
352          */
353
354         rt = ip6_rt_copy(ort);
355
356         if (rt) {
357                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
358                         if (rt->rt6i_dst.plen != 128 &&
359                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
360                                 rt->rt6i_flags |= RTF_ANYCAST;
361                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
362                 }
363
364                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
365                 rt->rt6i_dst.plen = 128;
366                 rt->rt6i_flags |= RTF_CACHE;
367                 rt->u.dst.flags |= DST_HOST;
368
369 #ifdef CONFIG_IPV6_SUBTREES
370                 if (rt->rt6i_src.plen && saddr) {
371                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
372                         rt->rt6i_src.plen = 128;
373                 }
374 #endif
375
376                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
377
378         }
379
380         return rt;
381 }
382
383 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
384 {
385         struct rt6_info *rt = ip6_rt_copy(ort);
386         if (rt) {
387                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
388                 rt->rt6i_dst.plen = 128;
389                 rt->rt6i_flags |= RTF_CACHE;
390                 if (rt->rt6i_flags & RTF_REJECT)
391                         rt->u.dst.error = ort->u.dst.error;
392                 rt->u.dst.flags |= DST_HOST;
393                 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
394         }
395         return rt;
396 }
397
398 #define BACKTRACK() \
399 if (rt == &ip6_null_entry && strict) { \
400        while ((fn = fn->parent) != NULL) { \
401                 if (fn->fn_flags & RTN_ROOT) { \
402                         goto out; \
403                 } \
404                 if (fn->fn_flags & RTN_RTINFO) \
405                         goto restart; \
406         } \
407 }
408
409
410 void ip6_route_input(struct sk_buff *skb)
411 {
412         struct fib6_node *fn;
413         struct rt6_info *rt, *nrt;
414         int strict;
415         int attempts = 3;
416         int err;
417
418         strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
419
420 relookup:
421         read_lock_bh(&rt6_lock);
422
423         fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
424                          &skb->nh.ipv6h->saddr);
425
426 restart:
427         rt = fn->leaf;
428
429         if ((rt->rt6i_flags & RTF_CACHE)) {
430                 rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | RT6_SELECT_F_REACHABLE);
431                 if (rt == &ip6_null_entry)
432                         rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict);
433                 BACKTRACK();
434                 goto out;
435         }
436
437         rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | RT6_SELECT_F_REACHABLE);
438         if (rt == &ip6_null_entry)
439                 rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict);
440         BACKTRACK();
441
442         dst_hold(&rt->u.dst);
443         read_unlock_bh(&rt6_lock);
444
445         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
446                 nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
447         else {
448 #if CLONE_OFFLINK_ROUTE
449                 nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
450 #else
451                 goto out2;
452 #endif
453         }
454
455         dst_release(&rt->u.dst);
456         rt = nrt ? : &ip6_null_entry;
457
458         dst_hold(&rt->u.dst);
459         if (nrt) {
460                 err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
461                 if (!err)
462                         goto out2;
463         }
464
465         if (--attempts <= 0)
466                 goto out2;
467
468         /*
469          * Race condition! In the gap, when rt6_lock was
470          * released someone could insert this route.  Relookup.
471          */
472         dst_release(&rt->u.dst);
473         goto relookup;
474
475 out:
476         dst_hold(&rt->u.dst);
477         read_unlock_bh(&rt6_lock);
478 out2:
479         rt->u.dst.lastuse = jiffies;
480         rt->u.dst.__use++;
481         skb->dst = (struct dst_entry *) rt;
482         return;
483 }
484
485 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
486 {
487         struct fib6_node *fn;
488         struct rt6_info *rt, *nrt;
489         int strict;
490         int attempts = 3;
491         int err;
492
493         strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
494
495 relookup:
496         read_lock_bh(&rt6_lock);
497
498         fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
499
500 restart:
501         rt = fn->leaf;
502
503         if ((rt->rt6i_flags & RTF_CACHE)) {
504                 rt = rt6_select(&fn->leaf, fl->oif, strict | RT6_SELECT_F_REACHABLE);
505                 if (rt == &ip6_null_entry)
506                         rt = rt6_select(&fn->leaf, fl->oif, strict);
507                 BACKTRACK();
508                 goto out;
509         }
510         if (rt->rt6i_flags & RTF_DEFAULT) {
511                 rt = rt6_select(&fn->leaf, fl->oif, strict | RT6_SELECT_F_REACHABLE);
512                 if (rt == &ip6_null_entry)
513                         rt = rt6_select(&fn->leaf, fl->oif, strict);
514         } else {
515                 rt = rt6_select(&fn->leaf, fl->oif, strict | RT6_SELECT_F_REACHABLE);
516                 if (rt == &ip6_null_entry)
517                         rt = rt6_select(&fn->leaf, fl->oif, strict);
518                 BACKTRACK();
519         }
520
521         dst_hold(&rt->u.dst);
522         read_unlock_bh(&rt6_lock);
523
524         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
525                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
526         else {
527 #if CLONE_OFFLINK_ROUTE
528                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
529 #else
530                 goto out2;
531 #endif
532         }
533
534         dst_release(&rt->u.dst);
535         rt = nrt ? : &ip6_null_entry;
536
537         dst_hold(&rt->u.dst);
538         if (nrt) {
539                 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
540                 if (!err)
541                         goto out2;
542         }
543
544         if (--attempts <= 0)
545                 goto out2;
546
547         /*
548          * Race condition! In the gap, when rt6_lock was
549          * released someone could insert this route.  Relookup.
550          */
551         dst_release(&rt->u.dst);
552         goto relookup;
553
554 out:
555         dst_hold(&rt->u.dst);
556         read_unlock_bh(&rt6_lock);
557 out2:
558         rt->u.dst.lastuse = jiffies;
559         rt->u.dst.__use++;
560         return &rt->u.dst;
561 }
562
563
564 /*
565  *      Destination cache support functions
566  */
567
568 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
569 {
570         struct rt6_info *rt;
571
572         rt = (struct rt6_info *) dst;
573
574         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
575                 return dst;
576
577         return NULL;
578 }
579
580 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
581 {
582         struct rt6_info *rt = (struct rt6_info *) dst;
583
584         if (rt) {
585                 if (rt->rt6i_flags & RTF_CACHE)
586                         ip6_del_rt(rt, NULL, NULL, NULL);
587                 else
588                         dst_release(dst);
589         }
590         return NULL;
591 }
592
593 static void ip6_link_failure(struct sk_buff *skb)
594 {
595         struct rt6_info *rt;
596
597         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
598
599         rt = (struct rt6_info *) skb->dst;
600         if (rt) {
601                 if (rt->rt6i_flags&RTF_CACHE) {
602                         dst_set_expires(&rt->u.dst, 0);
603                         rt->rt6i_flags |= RTF_EXPIRES;
604                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
605                         rt->rt6i_node->fn_sernum = -1;
606         }
607 }
608
609 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
610 {
611         struct rt6_info *rt6 = (struct rt6_info*)dst;
612
613         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
614                 rt6->rt6i_flags |= RTF_MODIFIED;
615                 if (mtu < IPV6_MIN_MTU) {
616                         mtu = IPV6_MIN_MTU;
617                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
618                 }
619                 dst->metrics[RTAX_MTU-1] = mtu;
620         }
621 }
622
623 /* Protected by rt6_lock.  */
624 static struct dst_entry *ndisc_dst_gc_list;
625 static int ipv6_get_mtu(struct net_device *dev);
626
627 static inline unsigned int ipv6_advmss(unsigned int mtu)
628 {
629         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
630
631         if (mtu < ip6_rt_min_advmss)
632                 mtu = ip6_rt_min_advmss;
633
634         /*
635          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
636          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
637          * IPV6_MAXPLEN is also valid and means: "any MSS, 
638          * rely only on pmtu discovery"
639          */
640         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
641                 mtu = IPV6_MAXPLEN;
642         return mtu;
643 }
644
645 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
646                                   struct neighbour *neigh,
647                                   struct in6_addr *addr,
648                                   int (*output)(struct sk_buff *))
649 {
650         struct rt6_info *rt;
651         struct inet6_dev *idev = in6_dev_get(dev);
652
653         if (unlikely(idev == NULL))
654                 return NULL;
655
656         rt = ip6_dst_alloc();
657         if (unlikely(rt == NULL)) {
658                 in6_dev_put(idev);
659                 goto out;
660         }
661
662         dev_hold(dev);
663         if (neigh)
664                 neigh_hold(neigh);
665         else
666                 neigh = ndisc_get_neigh(dev, addr);
667
668         rt->rt6i_dev      = dev;
669         rt->rt6i_idev     = idev;
670         rt->rt6i_nexthop  = neigh;
671         atomic_set(&rt->u.dst.__refcnt, 1);
672         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
673         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
674         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
675         rt->u.dst.output  = output;
676
677 #if 0   /* there's no chance to use these for ndisc */
678         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
679                                 ? DST_HOST 
680                                 : 0;
681         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
682         rt->rt6i_dst.plen = 128;
683 #endif
684
685         write_lock_bh(&rt6_lock);
686         rt->u.dst.next = ndisc_dst_gc_list;
687         ndisc_dst_gc_list = &rt->u.dst;
688         write_unlock_bh(&rt6_lock);
689
690         fib6_force_start_gc();
691
692 out:
693         return (struct dst_entry *)rt;
694 }
695
696 int ndisc_dst_gc(int *more)
697 {
698         struct dst_entry *dst, *next, **pprev;
699         int freed;
700
701         next = NULL;
702         pprev = &ndisc_dst_gc_list;
703         freed = 0;
704         while ((dst = *pprev) != NULL) {
705                 if (!atomic_read(&dst->__refcnt)) {
706                         *pprev = dst->next;
707                         dst_free(dst);
708                         freed++;
709                 } else {
710                         pprev = &dst->next;
711                         (*more)++;
712                 }
713         }
714
715         return freed;
716 }
717
718 static int ip6_dst_gc(void)
719 {
720         static unsigned expire = 30*HZ;
721         static unsigned long last_gc;
722         unsigned long now = jiffies;
723
724         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
725             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
726                 goto out;
727
728         expire++;
729         fib6_run_gc(expire);
730         last_gc = now;
731         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
732                 expire = ip6_rt_gc_timeout>>1;
733
734 out:
735         expire -= expire>>ip6_rt_gc_elasticity;
736         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
737 }
738
739 /* Clean host part of a prefix. Not necessary in radix tree,
740    but results in cleaner routing tables.
741
742    Remove it only when all the things will work!
743  */
744
745 static int ipv6_get_mtu(struct net_device *dev)
746 {
747         int mtu = IPV6_MIN_MTU;
748         struct inet6_dev *idev;
749
750         idev = in6_dev_get(dev);
751         if (idev) {
752                 mtu = idev->cnf.mtu6;
753                 in6_dev_put(idev);
754         }
755         return mtu;
756 }
757
758 int ipv6_get_hoplimit(struct net_device *dev)
759 {
760         int hoplimit = ipv6_devconf.hop_limit;
761         struct inet6_dev *idev;
762
763         idev = in6_dev_get(dev);
764         if (idev) {
765                 hoplimit = idev->cnf.hop_limit;
766                 in6_dev_put(idev);
767         }
768         return hoplimit;
769 }
770
771 /*
772  *
773  */
774
775 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, 
776                 void *_rtattr, struct netlink_skb_parms *req)
777 {
778         int err;
779         struct rtmsg *r;
780         struct rtattr **rta;
781         struct rt6_info *rt = NULL;
782         struct net_device *dev = NULL;
783         struct inet6_dev *idev = NULL;
784         int addr_type;
785
786         rta = (struct rtattr **) _rtattr;
787
788         if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
789                 return -EINVAL;
790 #ifndef CONFIG_IPV6_SUBTREES
791         if (rtmsg->rtmsg_src_len)
792                 return -EINVAL;
793 #endif
794         if (rtmsg->rtmsg_ifindex) {
795                 err = -ENODEV;
796                 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
797                 if (!dev)
798                         goto out;
799                 idev = in6_dev_get(dev);
800                 if (!idev)
801                         goto out;
802         }
803
804         if (rtmsg->rtmsg_metric == 0)
805                 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
806
807         rt = ip6_dst_alloc();
808
809         if (rt == NULL) {
810                 err = -ENOMEM;
811                 goto out;
812         }
813
814         rt->u.dst.obsolete = -1;
815         rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
816         if (nlh && (r = NLMSG_DATA(nlh))) {
817                 rt->rt6i_protocol = r->rtm_protocol;
818         } else {
819                 rt->rt6i_protocol = RTPROT_BOOT;
820         }
821
822         addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
823
824         if (addr_type & IPV6_ADDR_MULTICAST)
825                 rt->u.dst.input = ip6_mc_input;
826         else
827                 rt->u.dst.input = ip6_forward;
828
829         rt->u.dst.output = ip6_output;
830
831         ipv6_addr_prefix(&rt->rt6i_dst.addr, 
832                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
833         rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
834         if (rt->rt6i_dst.plen == 128)
835                rt->u.dst.flags = DST_HOST;
836
837 #ifdef CONFIG_IPV6_SUBTREES
838         ipv6_addr_prefix(&rt->rt6i_src.addr, 
839                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
840         rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
841 #endif
842
843         rt->rt6i_metric = rtmsg->rtmsg_metric;
844
845         /* We cannot add true routes via loopback here,
846            they would result in kernel looping; promote them to reject routes
847          */
848         if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
849             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
850                 /* hold loopback dev/idev if we haven't done so. */
851                 if (dev != &loopback_dev) {
852                         if (dev) {
853                                 dev_put(dev);
854                                 in6_dev_put(idev);
855                         }
856                         dev = &loopback_dev;
857                         dev_hold(dev);
858                         idev = in6_dev_get(dev);
859                         if (!idev) {
860                                 err = -ENODEV;
861                                 goto out;
862                         }
863                 }
864                 rt->u.dst.output = ip6_pkt_discard_out;
865                 rt->u.dst.input = ip6_pkt_discard;
866                 rt->u.dst.error = -ENETUNREACH;
867                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
868                 goto install_route;
869         }
870
871         if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
872                 struct in6_addr *gw_addr;
873                 int gwa_type;
874
875                 gw_addr = &rtmsg->rtmsg_gateway;
876                 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
877                 gwa_type = ipv6_addr_type(gw_addr);
878
879                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
880                         struct rt6_info *grt;
881
882                         /* IPv6 strictly inhibits using not link-local
883                            addresses as nexthop address.
884                            Otherwise, router will not able to send redirects.
885                            It is very good, but in some (rare!) circumstances
886                            (SIT, PtP, NBMA NOARP links) it is handy to allow
887                            some exceptions. --ANK
888                          */
889                         err = -EINVAL;
890                         if (!(gwa_type&IPV6_ADDR_UNICAST))
891                                 goto out;
892
893                         grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
894
895                         err = -EHOSTUNREACH;
896                         if (grt == NULL)
897                                 goto out;
898                         if (dev) {
899                                 if (dev != grt->rt6i_dev) {
900                                         dst_release(&grt->u.dst);
901                                         goto out;
902                                 }
903                         } else {
904                                 dev = grt->rt6i_dev;
905                                 idev = grt->rt6i_idev;
906                                 dev_hold(dev);
907                                 in6_dev_hold(grt->rt6i_idev);
908                         }
909                         if (!(grt->rt6i_flags&RTF_GATEWAY))
910                                 err = 0;
911                         dst_release(&grt->u.dst);
912
913                         if (err)
914                                 goto out;
915                 }
916                 err = -EINVAL;
917                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
918                         goto out;
919         }
920
921         err = -ENODEV;
922         if (dev == NULL)
923                 goto out;
924
925         if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
926                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
927                 if (IS_ERR(rt->rt6i_nexthop)) {
928                         err = PTR_ERR(rt->rt6i_nexthop);
929                         rt->rt6i_nexthop = NULL;
930                         goto out;
931                 }
932         }
933
934         rt->rt6i_flags = rtmsg->rtmsg_flags;
935
936 install_route:
937         if (rta && rta[RTA_METRICS-1]) {
938                 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
939                 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
940
941                 while (RTA_OK(attr, attrlen)) {
942                         unsigned flavor = attr->rta_type;
943                         if (flavor) {
944                                 if (flavor > RTAX_MAX) {
945                                         err = -EINVAL;
946                                         goto out;
947                                 }
948                                 rt->u.dst.metrics[flavor-1] =
949                                         *(u32 *)RTA_DATA(attr);
950                         }
951                         attr = RTA_NEXT(attr, attrlen);
952                 }
953         }
954
955         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
956                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
957         if (!rt->u.dst.metrics[RTAX_MTU-1])
958                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
959         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
960                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
961         rt->u.dst.dev = dev;
962         rt->rt6i_idev = idev;
963         return ip6_ins_rt(rt, nlh, _rtattr, req);
964
965 out:
966         if (dev)
967                 dev_put(dev);
968         if (idev)
969                 in6_dev_put(idev);
970         if (rt)
971                 dst_free((struct dst_entry *) rt);
972         return err;
973 }
974
975 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
976 {
977         int err;
978
979         write_lock_bh(&rt6_lock);
980
981         err = fib6_del(rt, nlh, _rtattr, req);
982         dst_release(&rt->u.dst);
983
984         write_unlock_bh(&rt6_lock);
985
986         return err;
987 }
988
989 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
990 {
991         struct fib6_node *fn;
992         struct rt6_info *rt;
993         int err = -ESRCH;
994
995         read_lock_bh(&rt6_lock);
996
997         fn = fib6_locate(&ip6_routing_table,
998                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
999                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1000         
1001         if (fn) {
1002                 for (rt = fn->leaf; rt; rt = rt->u.next) {
1003                         if (rtmsg->rtmsg_ifindex &&
1004                             (rt->rt6i_dev == NULL ||
1005                              rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1006                                 continue;
1007                         if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1008                             !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1009                                 continue;
1010                         if (rtmsg->rtmsg_metric &&
1011                             rtmsg->rtmsg_metric != rt->rt6i_metric)
1012                                 continue;
1013                         dst_hold(&rt->u.dst);
1014                         read_unlock_bh(&rt6_lock);
1015
1016                         return ip6_del_rt(rt, nlh, _rtattr, req);
1017                 }
1018         }
1019         read_unlock_bh(&rt6_lock);
1020
1021         return err;
1022 }
1023
1024 /*
1025  *      Handle redirects
1026  */
1027 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1028                   struct neighbour *neigh, u8 *lladdr, int on_link)
1029 {
1030         struct rt6_info *rt, *nrt;
1031
1032         /* Locate old route to this destination. */
1033         rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1034
1035         if (rt == NULL)
1036                 return;
1037
1038         if (neigh->dev != rt->rt6i_dev)
1039                 goto out;
1040
1041         /*
1042          * Current route is on-link; redirect is always invalid.
1043          * 
1044          * Seems, previous statement is not true. It could
1045          * be node, which looks for us as on-link (f.e. proxy ndisc)
1046          * But then router serving it might decide, that we should
1047          * know truth 8)8) --ANK (980726).
1048          */
1049         if (!(rt->rt6i_flags&RTF_GATEWAY))
1050                 goto out;
1051
1052         /*
1053          *      RFC 2461 specifies that redirects should only be
1054          *      accepted if they come from the nexthop to the target.
1055          *      Due to the way default routers are chosen, this notion
1056          *      is a bit fuzzy and one might need to check all default
1057          *      routers.
1058          */
1059         if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1060                 if (rt->rt6i_flags & RTF_DEFAULT) {
1061                         struct rt6_info *rt1;
1062
1063                         read_lock(&rt6_lock);
1064                         for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1065                                 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1066                                         dst_hold(&rt1->u.dst);
1067                                         dst_release(&rt->u.dst);
1068                                         read_unlock(&rt6_lock);
1069                                         rt = rt1;
1070                                         goto source_ok;
1071                                 }
1072                         }
1073                         read_unlock(&rt6_lock);
1074                 }
1075                 if (net_ratelimit())
1076                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1077                                "for redirect target\n");
1078                 goto out;
1079         }
1080
1081 source_ok:
1082
1083         /*
1084          *      We have finally decided to accept it.
1085          */
1086
1087         neigh_update(neigh, lladdr, NUD_STALE, 
1088                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1089                      NEIGH_UPDATE_F_OVERRIDE|
1090                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1091                                      NEIGH_UPDATE_F_ISROUTER))
1092                      );
1093
1094         /*
1095          * Redirect received -> path was valid.
1096          * Look, redirects are sent only in response to data packets,
1097          * so that this nexthop apparently is reachable. --ANK
1098          */
1099         dst_confirm(&rt->u.dst);
1100
1101         /* Duplicate redirect: silently ignore. */
1102         if (neigh == rt->u.dst.neighbour)
1103                 goto out;
1104
1105         nrt = ip6_rt_copy(rt);
1106         if (nrt == NULL)
1107                 goto out;
1108
1109         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1110         if (on_link)
1111                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1112
1113         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1114         nrt->rt6i_dst.plen = 128;
1115         nrt->u.dst.flags |= DST_HOST;
1116
1117         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1118         nrt->rt6i_nexthop = neigh_clone(neigh);
1119         /* Reset pmtu, it may be better */
1120         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1121         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1122
1123         if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1124                 goto out;
1125
1126         if (rt->rt6i_flags&RTF_CACHE) {
1127                 ip6_del_rt(rt, NULL, NULL, NULL);
1128                 return;
1129         }
1130
1131 out:
1132         dst_release(&rt->u.dst);
1133         return;
1134 }
1135
1136 /*
1137  *      Handle ICMP "packet too big" messages
1138  *      i.e. Path MTU discovery
1139  */
1140
1141 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1142                         struct net_device *dev, u32 pmtu)
1143 {
1144         struct rt6_info *rt, *nrt;
1145         int allfrag = 0;
1146
1147         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1148         if (rt == NULL)
1149                 return;
1150
1151         if (pmtu >= dst_mtu(&rt->u.dst))
1152                 goto out;
1153
1154         if (pmtu < IPV6_MIN_MTU) {
1155                 /*
1156                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link 
1157                  * MTU (1280) and a fragment header should always be included
1158                  * after a node receiving Too Big message reporting PMTU is
1159                  * less than the IPv6 Minimum Link MTU.
1160                  */
1161                 pmtu = IPV6_MIN_MTU;
1162                 allfrag = 1;
1163         }
1164
1165         /* New mtu received -> path was valid.
1166            They are sent only in response to data packets,
1167            so that this nexthop apparently is reachable. --ANK
1168          */
1169         dst_confirm(&rt->u.dst);
1170
1171         /* Host route. If it is static, it would be better
1172            not to override it, but add new one, so that
1173            when cache entry will expire old pmtu
1174            would return automatically.
1175          */
1176         if (rt->rt6i_flags & RTF_CACHE) {
1177                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1178                 if (allfrag)
1179                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1180                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1181                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1182                 goto out;
1183         }
1184
1185         /* Network route.
1186            Two cases are possible:
1187            1. It is connected route. Action: COW
1188            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1189          */
1190         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1191                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1192         else
1193                 nrt = rt6_alloc_clone(rt, daddr);
1194
1195         if (nrt) {
1196                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1197                 if (allfrag)
1198                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1199
1200                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1201                  * happened within 5 mins, the recommended timer is 10 mins.
1202                  * Here this route expiration time is set to ip6_rt_mtu_expires
1203                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1204                  * and detecting PMTU increase will be automatically happened.
1205                  */
1206                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1207                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1208
1209                 ip6_ins_rt(nrt, NULL, NULL, NULL);
1210         }
1211 out:
1212         dst_release(&rt->u.dst);
1213 }
1214
1215 /*
1216  *      Misc support functions
1217  */
1218
1219 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1220 {
1221         struct rt6_info *rt = ip6_dst_alloc();
1222
1223         if (rt) {
1224                 rt->u.dst.input = ort->u.dst.input;
1225                 rt->u.dst.output = ort->u.dst.output;
1226
1227                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1228                 rt->u.dst.dev = ort->u.dst.dev;
1229                 if (rt->u.dst.dev)
1230                         dev_hold(rt->u.dst.dev);
1231                 rt->rt6i_idev = ort->rt6i_idev;
1232                 if (rt->rt6i_idev)
1233                         in6_dev_hold(rt->rt6i_idev);
1234                 rt->u.dst.lastuse = jiffies;
1235                 rt->rt6i_expires = 0;
1236
1237                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1238                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1239                 rt->rt6i_metric = 0;
1240
1241                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1242 #ifdef CONFIG_IPV6_SUBTREES
1243                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1244 #endif
1245         }
1246         return rt;
1247 }
1248
1249 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1250 {       
1251         struct rt6_info *rt;
1252         struct fib6_node *fn;
1253
1254         fn = &ip6_routing_table;
1255
1256         write_lock_bh(&rt6_lock);
1257         for (rt = fn->leaf; rt; rt=rt->u.next) {
1258                 if (dev == rt->rt6i_dev &&
1259                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1260                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1261                         break;
1262         }
1263         if (rt)
1264                 dst_hold(&rt->u.dst);
1265         write_unlock_bh(&rt6_lock);
1266         return rt;
1267 }
1268
1269 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1270                                      struct net_device *dev)
1271 {
1272         struct in6_rtmsg rtmsg;
1273
1274         memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1275         rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1276         ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1277         rtmsg.rtmsg_metric = 1024;
1278         rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1279
1280         rtmsg.rtmsg_ifindex = dev->ifindex;
1281
1282         ip6_route_add(&rtmsg, NULL, NULL, NULL);
1283         return rt6_get_dflt_router(gwaddr, dev);
1284 }
1285
1286 void rt6_purge_dflt_routers(void)
1287 {
1288         struct rt6_info *rt;
1289
1290 restart:
1291         read_lock_bh(&rt6_lock);
1292         for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1293                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1294                         dst_hold(&rt->u.dst);
1295
1296                         read_unlock_bh(&rt6_lock);
1297
1298                         ip6_del_rt(rt, NULL, NULL, NULL);
1299
1300                         goto restart;
1301                 }
1302         }
1303         read_unlock_bh(&rt6_lock);
1304 }
1305
1306 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1307 {
1308         struct in6_rtmsg rtmsg;
1309         int err;
1310
1311         switch(cmd) {
1312         case SIOCADDRT:         /* Add a route */
1313         case SIOCDELRT:         /* Delete a route */
1314                 if (!capable(CAP_NET_ADMIN))
1315                         return -EPERM;
1316                 err = copy_from_user(&rtmsg, arg,
1317                                      sizeof(struct in6_rtmsg));
1318                 if (err)
1319                         return -EFAULT;
1320                         
1321                 rtnl_lock();
1322                 switch (cmd) {
1323                 case SIOCADDRT:
1324                         err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1325                         break;
1326                 case SIOCDELRT:
1327                         err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1328                         break;
1329                 default:
1330                         err = -EINVAL;
1331                 }
1332                 rtnl_unlock();
1333
1334                 return err;
1335         };
1336
1337         return -EINVAL;
1338 }
1339
1340 /*
1341  *      Drop the packet on the floor
1342  */
1343
1344 static int ip6_pkt_discard(struct sk_buff *skb)
1345 {
1346         IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1347         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1348         kfree_skb(skb);
1349         return 0;
1350 }
1351
1352 static int ip6_pkt_discard_out(struct sk_buff *skb)
1353 {
1354         skb->dev = skb->dst->dev;
1355         return ip6_pkt_discard(skb);
1356 }
1357
1358 /*
1359  *      Allocate a dst for local (unicast / anycast) address.
1360  */
1361
1362 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1363                                     const struct in6_addr *addr,
1364                                     int anycast)
1365 {
1366         struct rt6_info *rt = ip6_dst_alloc();
1367
1368         if (rt == NULL)
1369                 return ERR_PTR(-ENOMEM);
1370
1371         dev_hold(&loopback_dev);
1372         in6_dev_hold(idev);
1373
1374         rt->u.dst.flags = DST_HOST;
1375         rt->u.dst.input = ip6_input;
1376         rt->u.dst.output = ip6_output;
1377         rt->rt6i_dev = &loopback_dev;
1378         rt->rt6i_idev = idev;
1379         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1380         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1381         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1382         rt->u.dst.obsolete = -1;
1383
1384         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1385         if (anycast)
1386                 rt->rt6i_flags |= RTF_ANYCAST;
1387         else
1388                 rt->rt6i_flags |= RTF_LOCAL;
1389         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1390         if (rt->rt6i_nexthop == NULL) {
1391                 dst_free((struct dst_entry *) rt);
1392                 return ERR_PTR(-ENOMEM);
1393         }
1394
1395         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1396         rt->rt6i_dst.plen = 128;
1397
1398         atomic_set(&rt->u.dst.__refcnt, 1);
1399
1400         return rt;
1401 }
1402
1403 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1404 {
1405         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1406             rt != &ip6_null_entry) {
1407                 RT6_TRACE("deleted by ifdown %p\n", rt);
1408                 return -1;
1409         }
1410         return 0;
1411 }
1412
1413 void rt6_ifdown(struct net_device *dev)
1414 {
1415         write_lock_bh(&rt6_lock);
1416         fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1417         write_unlock_bh(&rt6_lock);
1418 }
1419
1420 struct rt6_mtu_change_arg
1421 {
1422         struct net_device *dev;
1423         unsigned mtu;
1424 };
1425
1426 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1427 {
1428         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1429         struct inet6_dev *idev;
1430
1431         /* In IPv6 pmtu discovery is not optional,
1432            so that RTAX_MTU lock cannot disable it.
1433            We still use this lock to block changes
1434            caused by addrconf/ndisc.
1435         */
1436
1437         idev = __in6_dev_get(arg->dev);
1438         if (idev == NULL)
1439                 return 0;
1440
1441         /* For administrative MTU increase, there is no way to discover
1442            IPv6 PMTU increase, so PMTU increase should be updated here.
1443            Since RFC 1981 doesn't include administrative MTU increase
1444            update PMTU increase is a MUST. (i.e. jumbo frame)
1445          */
1446         /*
1447            If new MTU is less than route PMTU, this new MTU will be the
1448            lowest MTU in the path, update the route PMTU to reflect PMTU
1449            decreases; if new MTU is greater than route PMTU, and the
1450            old MTU is the lowest MTU in the path, update the route PMTU
1451            to reflect the increase. In this case if the other nodes' MTU
1452            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1453            PMTU discouvery.
1454          */
1455         if (rt->rt6i_dev == arg->dev &&
1456             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1457             (dst_mtu(&rt->u.dst) > arg->mtu ||
1458              (dst_mtu(&rt->u.dst) < arg->mtu &&
1459               dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1460                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1461         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1462         return 0;
1463 }
1464
1465 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1466 {
1467         struct rt6_mtu_change_arg arg;
1468
1469         arg.dev = dev;
1470         arg.mtu = mtu;
1471         read_lock_bh(&rt6_lock);
1472         fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1473         read_unlock_bh(&rt6_lock);
1474 }
1475
1476 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1477                               struct in6_rtmsg *rtmsg)
1478 {
1479         memset(rtmsg, 0, sizeof(*rtmsg));
1480
1481         rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1482         rtmsg->rtmsg_src_len = r->rtm_src_len;
1483         rtmsg->rtmsg_flags = RTF_UP;
1484         if (r->rtm_type == RTN_UNREACHABLE)
1485                 rtmsg->rtmsg_flags |= RTF_REJECT;
1486
1487         if (rta[RTA_GATEWAY-1]) {
1488                 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1489                         return -EINVAL;
1490                 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1491                 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1492         }
1493         if (rta[RTA_DST-1]) {
1494                 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1495                         return -EINVAL;
1496                 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1497         }
1498         if (rta[RTA_SRC-1]) {
1499                 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1500                         return -EINVAL;
1501                 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1502         }
1503         if (rta[RTA_OIF-1]) {
1504                 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1505                         return -EINVAL;
1506                 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1507         }
1508         if (rta[RTA_PRIORITY-1]) {
1509                 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1510                         return -EINVAL;
1511                 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1512         }
1513         return 0;
1514 }
1515
1516 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1517 {
1518         struct rtmsg *r = NLMSG_DATA(nlh);
1519         struct in6_rtmsg rtmsg;
1520
1521         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1522                 return -EINVAL;
1523         return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1524 }
1525
1526 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1527 {
1528         struct rtmsg *r = NLMSG_DATA(nlh);
1529         struct in6_rtmsg rtmsg;
1530
1531         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1532                 return -EINVAL;
1533         return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1534 }
1535
1536 struct rt6_rtnl_dump_arg
1537 {
1538         struct sk_buff *skb;
1539         struct netlink_callback *cb;
1540 };
1541
1542 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1543                          struct in6_addr *dst, struct in6_addr *src,
1544                          int iif, int type, u32 pid, u32 seq,
1545                          int prefix, unsigned int flags)
1546 {
1547         struct rtmsg *rtm;
1548         struct nlmsghdr  *nlh;
1549         unsigned char    *b = skb->tail;
1550         struct rta_cacheinfo ci;
1551
1552         if (prefix) {   /* user wants prefix routes only */
1553                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1554                         /* success since this is not a prefix route */
1555                         return 1;
1556                 }
1557         }
1558
1559         nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1560         rtm = NLMSG_DATA(nlh);
1561         rtm->rtm_family = AF_INET6;
1562         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1563         rtm->rtm_src_len = rt->rt6i_src.plen;
1564         rtm->rtm_tos = 0;
1565         rtm->rtm_table = RT_TABLE_MAIN;
1566         if (rt->rt6i_flags&RTF_REJECT)
1567                 rtm->rtm_type = RTN_UNREACHABLE;
1568         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1569                 rtm->rtm_type = RTN_LOCAL;
1570         else
1571                 rtm->rtm_type = RTN_UNICAST;
1572         rtm->rtm_flags = 0;
1573         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1574         rtm->rtm_protocol = rt->rt6i_protocol;
1575         if (rt->rt6i_flags&RTF_DYNAMIC)
1576                 rtm->rtm_protocol = RTPROT_REDIRECT;
1577         else if (rt->rt6i_flags & RTF_ADDRCONF)
1578                 rtm->rtm_protocol = RTPROT_KERNEL;
1579         else if (rt->rt6i_flags&RTF_DEFAULT)
1580                 rtm->rtm_protocol = RTPROT_RA;
1581
1582         if (rt->rt6i_flags&RTF_CACHE)
1583                 rtm->rtm_flags |= RTM_F_CLONED;
1584
1585         if (dst) {
1586                 RTA_PUT(skb, RTA_DST, 16, dst);
1587                 rtm->rtm_dst_len = 128;
1588         } else if (rtm->rtm_dst_len)
1589                 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1590 #ifdef CONFIG_IPV6_SUBTREES
1591         if (src) {
1592                 RTA_PUT(skb, RTA_SRC, 16, src);
1593                 rtm->rtm_src_len = 128;
1594         } else if (rtm->rtm_src_len)
1595                 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1596 #endif
1597         if (iif)
1598                 RTA_PUT(skb, RTA_IIF, 4, &iif);
1599         else if (dst) {
1600                 struct in6_addr saddr_buf;
1601                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1602                         RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1603         }
1604         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1605                 goto rtattr_failure;
1606         if (rt->u.dst.neighbour)
1607                 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1608         if (rt->u.dst.dev)
1609                 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1610         RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1611         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1612         if (rt->rt6i_expires)
1613                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1614         else
1615                 ci.rta_expires = 0;
1616         ci.rta_used = rt->u.dst.__use;
1617         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1618         ci.rta_error = rt->u.dst.error;
1619         ci.rta_id = 0;
1620         ci.rta_ts = 0;
1621         ci.rta_tsage = 0;
1622         RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1623         nlh->nlmsg_len = skb->tail - b;
1624         return skb->len;
1625
1626 nlmsg_failure:
1627 rtattr_failure:
1628         skb_trim(skb, b - skb->data);
1629         return -1;
1630 }
1631
1632 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1633 {
1634         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1635         int prefix;
1636
1637         if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1638                 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1639                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1640         } else
1641                 prefix = 0;
1642
1643         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1644                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1645                      prefix, NLM_F_MULTI);
1646 }
1647
1648 static int fib6_dump_node(struct fib6_walker_t *w)
1649 {
1650         int res;
1651         struct rt6_info *rt;
1652
1653         for (rt = w->leaf; rt; rt = rt->u.next) {
1654                 res = rt6_dump_route(rt, w->args);
1655                 if (res < 0) {
1656                         /* Frame is full, suspend walking */
1657                         w->leaf = rt;
1658                         return 1;
1659                 }
1660                 BUG_TRAP(res!=0);
1661         }
1662         w->leaf = NULL;
1663         return 0;
1664 }
1665
1666 static void fib6_dump_end(struct netlink_callback *cb)
1667 {
1668         struct fib6_walker_t *w = (void*)cb->args[0];
1669
1670         if (w) {
1671                 cb->args[0] = 0;
1672                 fib6_walker_unlink(w);
1673                 kfree(w);
1674         }
1675         cb->done = (void*)cb->args[1];
1676         cb->args[1] = 0;
1677 }
1678
1679 static int fib6_dump_done(struct netlink_callback *cb)
1680 {
1681         fib6_dump_end(cb);
1682         return cb->done ? cb->done(cb) : 0;
1683 }
1684
1685 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1686 {
1687         struct rt6_rtnl_dump_arg arg;
1688         struct fib6_walker_t *w;
1689         int res;
1690
1691         arg.skb = skb;
1692         arg.cb = cb;
1693
1694         w = (void*)cb->args[0];
1695         if (w == NULL) {
1696                 /* New dump:
1697                  * 
1698                  * 1. hook callback destructor.
1699                  */
1700                 cb->args[1] = (long)cb->done;
1701                 cb->done = fib6_dump_done;
1702
1703                 /*
1704                  * 2. allocate and initialize walker.
1705                  */
1706                 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1707                 if (w == NULL)
1708                         return -ENOMEM;
1709                 RT6_TRACE("dump<%p", w);
1710                 memset(w, 0, sizeof(*w));
1711                 w->root = &ip6_routing_table;
1712                 w->func = fib6_dump_node;
1713                 w->args = &arg;
1714                 cb->args[0] = (long)w;
1715                 read_lock_bh(&rt6_lock);
1716                 res = fib6_walk(w);
1717                 read_unlock_bh(&rt6_lock);
1718         } else {
1719                 w->args = &arg;
1720                 read_lock_bh(&rt6_lock);
1721                 res = fib6_walk_continue(w);
1722                 read_unlock_bh(&rt6_lock);
1723         }
1724 #if RT6_DEBUG >= 3
1725         if (res <= 0 && skb->len == 0)
1726                 RT6_TRACE("%p>dump end\n", w);
1727 #endif
1728         res = res < 0 ? res : skb->len;
1729         /* res < 0 is an error. (really, impossible)
1730            res == 0 means that dump is complete, but skb still can contain data.
1731            res > 0 dump is not complete, but frame is full.
1732          */
1733         /* Destroy walker, if dump of this table is complete. */
1734         if (res <= 0)
1735                 fib6_dump_end(cb);
1736         return res;
1737 }
1738
1739 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1740 {
1741         struct rtattr **rta = arg;
1742         int iif = 0;
1743         int err = -ENOBUFS;
1744         struct sk_buff *skb;
1745         struct flowi fl;
1746         struct rt6_info *rt;
1747
1748         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1749         if (skb == NULL)
1750                 goto out;
1751
1752         /* Reserve room for dummy headers, this skb can pass
1753            through good chunk of routing engine.
1754          */
1755         skb->mac.raw = skb->data;
1756         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1757
1758         memset(&fl, 0, sizeof(fl));
1759         if (rta[RTA_SRC-1])
1760                 ipv6_addr_copy(&fl.fl6_src,
1761                                (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1762         if (rta[RTA_DST-1])
1763                 ipv6_addr_copy(&fl.fl6_dst,
1764                                (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1765
1766         if (rta[RTA_IIF-1])
1767                 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1768
1769         if (iif) {
1770                 struct net_device *dev;
1771                 dev = __dev_get_by_index(iif);
1772                 if (!dev) {
1773                         err = -ENODEV;
1774                         goto out_free;
1775                 }
1776         }
1777
1778         fl.oif = 0;
1779         if (rta[RTA_OIF-1])
1780                 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1781
1782         rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1783
1784         skb->dst = &rt->u.dst;
1785
1786         NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1787         err = rt6_fill_node(skb, rt, 
1788                             &fl.fl6_dst, &fl.fl6_src,
1789                             iif,
1790                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1791                             nlh->nlmsg_seq, 0, 0);
1792         if (err < 0) {
1793                 err = -EMSGSIZE;
1794                 goto out_free;
1795         }
1796
1797         err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1798         if (err > 0)
1799                 err = 0;
1800 out:
1801         return err;
1802 out_free:
1803         kfree_skb(skb);
1804         goto out;       
1805 }
1806
1807 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, 
1808                         struct netlink_skb_parms *req)
1809 {
1810         struct sk_buff *skb;
1811         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1812         u32 pid = current->pid;
1813         u32 seq = 0;
1814
1815         if (req)
1816                 pid = req->pid;
1817         if (nlh)
1818                 seq = nlh->nlmsg_seq;
1819         
1820         skb = alloc_skb(size, gfp_any());
1821         if (!skb) {
1822                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1823                 return;
1824         }
1825         if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1826                 kfree_skb(skb);
1827                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1828                 return;
1829         }
1830         NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1831         netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1832 }
1833
1834 /*
1835  *      /proc
1836  */
1837
1838 #ifdef CONFIG_PROC_FS
1839
1840 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1841
1842 struct rt6_proc_arg
1843 {
1844         char *buffer;
1845         int offset;
1846         int length;
1847         int skip;
1848         int len;
1849 };
1850
1851 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1852 {
1853         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1854         int i;
1855
1856         if (arg->skip < arg->offset / RT6_INFO_LEN) {
1857                 arg->skip++;
1858                 return 0;
1859         }
1860
1861         if (arg->len >= arg->length)
1862                 return 0;
1863
1864         for (i=0; i<16; i++) {
1865                 sprintf(arg->buffer + arg->len, "%02x",
1866                         rt->rt6i_dst.addr.s6_addr[i]);
1867                 arg->len += 2;
1868         }
1869         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1870                             rt->rt6i_dst.plen);
1871
1872 #ifdef CONFIG_IPV6_SUBTREES
1873         for (i=0; i<16; i++) {
1874                 sprintf(arg->buffer + arg->len, "%02x",
1875                         rt->rt6i_src.addr.s6_addr[i]);
1876                 arg->len += 2;
1877         }
1878         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1879                             rt->rt6i_src.plen);
1880 #else
1881         sprintf(arg->buffer + arg->len,
1882                 "00000000000000000000000000000000 00 ");
1883         arg->len += 36;
1884 #endif
1885
1886         if (rt->rt6i_nexthop) {
1887                 for (i=0; i<16; i++) {
1888                         sprintf(arg->buffer + arg->len, "%02x",
1889                                 rt->rt6i_nexthop->primary_key[i]);
1890                         arg->len += 2;
1891                 }
1892         } else {
1893                 sprintf(arg->buffer + arg->len,
1894                         "00000000000000000000000000000000");
1895                 arg->len += 32;
1896         }
1897         arg->len += sprintf(arg->buffer + arg->len,
1898                             " %08x %08x %08x %08x %8s\n",
1899                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1900                             rt->u.dst.__use, rt->rt6i_flags, 
1901                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
1902         return 0;
1903 }
1904
1905 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1906 {
1907         struct rt6_proc_arg arg;
1908         arg.buffer = buffer;
1909         arg.offset = offset;
1910         arg.length = length;
1911         arg.skip = 0;
1912         arg.len = 0;
1913
1914         read_lock_bh(&rt6_lock);
1915         fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1916         read_unlock_bh(&rt6_lock);
1917
1918         *start = buffer;
1919         if (offset)
1920                 *start += offset % RT6_INFO_LEN;
1921
1922         arg.len -= offset % RT6_INFO_LEN;
1923
1924         if (arg.len > length)
1925                 arg.len = length;
1926         if (arg.len < 0)
1927                 arg.len = 0;
1928
1929         return arg.len;
1930 }
1931
1932 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1933 {
1934         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1935                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1936                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1937                       rt6_stats.fib_rt_cache,
1938                       atomic_read(&ip6_dst_ops.entries),
1939                       rt6_stats.fib_discarded_routes);
1940
1941         return 0;
1942 }
1943
1944 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1945 {
1946         return single_open(file, rt6_stats_seq_show, NULL);
1947 }
1948
1949 static struct file_operations rt6_stats_seq_fops = {
1950         .owner   = THIS_MODULE,
1951         .open    = rt6_stats_seq_open,
1952         .read    = seq_read,
1953         .llseek  = seq_lseek,
1954         .release = single_release,
1955 };
1956 #endif  /* CONFIG_PROC_FS */
1957
1958 #ifdef CONFIG_SYSCTL
1959
1960 static int flush_delay;
1961
1962 static
1963 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1964                               void __user *buffer, size_t *lenp, loff_t *ppos)
1965 {
1966         if (write) {
1967                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1968                 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
1969                 return 0;
1970         } else
1971                 return -EINVAL;
1972 }
1973
1974 ctl_table ipv6_route_table[] = {
1975         {
1976                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
1977                 .procname       =       "flush",
1978                 .data           =       &flush_delay,
1979                 .maxlen         =       sizeof(int),
1980                 .mode           =       0200,
1981                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
1982         },
1983         {
1984                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
1985                 .procname       =       "gc_thresh",
1986                 .data           =       &ip6_dst_ops.gc_thresh,
1987                 .maxlen         =       sizeof(int),
1988                 .mode           =       0644,
1989                 .proc_handler   =       &proc_dointvec,
1990         },
1991         {
1992                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
1993                 .procname       =       "max_size",
1994                 .data           =       &ip6_rt_max_size,
1995                 .maxlen         =       sizeof(int),
1996                 .mode           =       0644,
1997                 .proc_handler   =       &proc_dointvec,
1998         },
1999         {
2000                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2001                 .procname       =       "gc_min_interval",
2002                 .data           =       &ip6_rt_gc_min_interval,
2003                 .maxlen         =       sizeof(int),
2004                 .mode           =       0644,
2005                 .proc_handler   =       &proc_dointvec_jiffies,
2006                 .strategy       =       &sysctl_jiffies,
2007         },
2008         {
2009                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2010                 .procname       =       "gc_timeout",
2011                 .data           =       &ip6_rt_gc_timeout,
2012                 .maxlen         =       sizeof(int),
2013                 .mode           =       0644,
2014                 .proc_handler   =       &proc_dointvec_jiffies,
2015                 .strategy       =       &sysctl_jiffies,
2016         },
2017         {
2018                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2019                 .procname       =       "gc_interval",
2020                 .data           =       &ip6_rt_gc_interval,
2021                 .maxlen         =       sizeof(int),
2022                 .mode           =       0644,
2023                 .proc_handler   =       &proc_dointvec_jiffies,
2024                 .strategy       =       &sysctl_jiffies,
2025         },
2026         {
2027                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2028                 .procname       =       "gc_elasticity",
2029                 .data           =       &ip6_rt_gc_elasticity,
2030                 .maxlen         =       sizeof(int),
2031                 .mode           =       0644,
2032                 .proc_handler   =       &proc_dointvec_jiffies,
2033                 .strategy       =       &sysctl_jiffies,
2034         },
2035         {
2036                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2037                 .procname       =       "mtu_expires",
2038                 .data           =       &ip6_rt_mtu_expires,
2039                 .maxlen         =       sizeof(int),
2040                 .mode           =       0644,
2041                 .proc_handler   =       &proc_dointvec_jiffies,
2042                 .strategy       =       &sysctl_jiffies,
2043         },
2044         {
2045                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2046                 .procname       =       "min_adv_mss",
2047                 .data           =       &ip6_rt_min_advmss,
2048                 .maxlen         =       sizeof(int),
2049                 .mode           =       0644,
2050                 .proc_handler   =       &proc_dointvec_jiffies,
2051                 .strategy       =       &sysctl_jiffies,
2052         },
2053         {
2054                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2055                 .procname       =       "gc_min_interval_ms",
2056                 .data           =       &ip6_rt_gc_min_interval,
2057                 .maxlen         =       sizeof(int),
2058                 .mode           =       0644,
2059                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2060                 .strategy       =       &sysctl_ms_jiffies,
2061         },
2062         { .ctl_name = 0 }
2063 };
2064
2065 #endif
2066
2067 void __init ip6_route_init(void)
2068 {
2069         struct proc_dir_entry *p;
2070
2071         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2072                                                      sizeof(struct rt6_info),
2073                                                      0, SLAB_HWCACHE_ALIGN,
2074                                                      NULL, NULL);
2075         if (!ip6_dst_ops.kmem_cachep)
2076                 panic("cannot create ip6_dst_cache");
2077
2078         fib6_init();
2079 #ifdef  CONFIG_PROC_FS
2080         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2081         if (p)
2082                 p->owner = THIS_MODULE;
2083
2084         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2085 #endif
2086 #ifdef CONFIG_XFRM
2087         xfrm6_init();
2088 #endif
2089 }
2090
2091 void ip6_route_cleanup(void)
2092 {
2093 #ifdef CONFIG_PROC_FS
2094         proc_net_remove("ipv6_route");
2095         proc_net_remove("rt6_stats");
2096 #endif
2097 #ifdef CONFIG_XFRM
2098         xfrm6_fini();
2099 #endif
2100         rt6_ifdown(NULL);
2101         fib6_gc_cleanup();
2102         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2103 }