]> err.no Git - linux-2.6/blob - net/ipv6/route.c
[IPV6]: ROUTE: Handle finding the next best route in reachability in BACKTRACK().
[linux-2.6] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/capability.h>
28 #include <linux/config.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/init.h>
39 #include <linux/netlink.h>
40 #include <linux/if_arp.h>
41
42 #ifdef  CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46
47 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
53 #include <net/tcp.h>
54 #include <linux/rtnetlink.h>
55 #include <net/dst.h>
56 #include <net/xfrm.h>
57
58 #include <asm/uaccess.h>
59
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74
75 #define CLONE_OFFLINK_ROUTE 0
76
77 #define RT6_SELECT_F_IFACE      0x1
78 #define RT6_SELECT_F_REACHABLE  0x2
79
80 static int ip6_rt_max_size = 4096;
81 static int ip6_rt_gc_min_interval = HZ / 2;
82 static int ip6_rt_gc_timeout = 60*HZ;
83 int ip6_rt_gc_interval = 30*HZ;
84 static int ip6_rt_gc_elasticity = 9;
85 static int ip6_rt_mtu_expires = 10*60*HZ;
86 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91 static void             ip6_dst_destroy(struct dst_entry *);
92 static void             ip6_dst_ifdown(struct dst_entry *,
93                                        struct net_device *dev, int how);
94 static int               ip6_dst_gc(void);
95
96 static int              ip6_pkt_discard(struct sk_buff *skb);
97 static int              ip6_pkt_discard_out(struct sk_buff *skb);
98 static void             ip6_link_failure(struct sk_buff *skb);
99 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
101 static struct dst_ops ip6_dst_ops = {
102         .family                 =       AF_INET6,
103         .protocol               =       __constant_htons(ETH_P_IPV6),
104         .gc                     =       ip6_dst_gc,
105         .gc_thresh              =       1024,
106         .check                  =       ip6_dst_check,
107         .destroy                =       ip6_dst_destroy,
108         .ifdown                 =       ip6_dst_ifdown,
109         .negative_advice        =       ip6_negative_advice,
110         .link_failure           =       ip6_link_failure,
111         .update_pmtu            =       ip6_rt_update_pmtu,
112         .entry_size             =       sizeof(struct rt6_info),
113 };
114
115 struct rt6_info ip6_null_entry = {
116         .u = {
117                 .dst = {
118                         .__refcnt       = ATOMIC_INIT(1),
119                         .__use          = 1,
120                         .dev            = &loopback_dev,
121                         .obsolete       = -1,
122                         .error          = -ENETUNREACH,
123                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
124                         .input          = ip6_pkt_discard,
125                         .output         = ip6_pkt_discard_out,
126                         .ops            = &ip6_dst_ops,
127                         .path           = (struct dst_entry*)&ip6_null_entry,
128                 }
129         },
130         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
131         .rt6i_metric    = ~(u32) 0,
132         .rt6i_ref       = ATOMIC_INIT(1),
133 };
134
135 struct fib6_node ip6_routing_table = {
136         .leaf           = &ip6_null_entry,
137         .fn_flags       = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
138 };
139
140 /* Protects all the ip6 fib */
141
142 DEFINE_RWLOCK(rt6_lock);
143
144
145 /* allocate dst with ip6_dst_ops */
146 static __inline__ struct rt6_info *ip6_dst_alloc(void)
147 {
148         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
149 }
150
151 static void ip6_dst_destroy(struct dst_entry *dst)
152 {
153         struct rt6_info *rt = (struct rt6_info *)dst;
154         struct inet6_dev *idev = rt->rt6i_idev;
155
156         if (idev != NULL) {
157                 rt->rt6i_idev = NULL;
158                 in6_dev_put(idev);
159         }       
160 }
161
162 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
163                            int how)
164 {
165         struct rt6_info *rt = (struct rt6_info *)dst;
166         struct inet6_dev *idev = rt->rt6i_idev;
167
168         if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
169                 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
170                 if (loopback_idev != NULL) {
171                         rt->rt6i_idev = loopback_idev;
172                         in6_dev_put(idev);
173                 }
174         }
175 }
176
177 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
178 {
179         return (rt->rt6i_flags & RTF_EXPIRES &&
180                 time_after(jiffies, rt->rt6i_expires));
181 }
182
183 /*
184  *      Route lookup. Any rt6_lock is implied.
185  */
186
187 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
188                                                     int oif,
189                                                     int strict)
190 {
191         struct rt6_info *local = NULL;
192         struct rt6_info *sprt;
193
194         if (oif) {
195                 for (sprt = rt; sprt; sprt = sprt->u.next) {
196                         struct net_device *dev = sprt->rt6i_dev;
197                         if (dev->ifindex == oif)
198                                 return sprt;
199                         if (dev->flags & IFF_LOOPBACK) {
200                                 if (sprt->rt6i_idev == NULL ||
201                                     sprt->rt6i_idev->dev->ifindex != oif) {
202                                         if (strict && oif)
203                                                 continue;
204                                         if (local && (!oif || 
205                                                       local->rt6i_idev->dev->ifindex == oif))
206                                                 continue;
207                                 }
208                                 local = sprt;
209                         }
210                 }
211
212                 if (local)
213                         return local;
214
215                 if (strict)
216                         return &ip6_null_entry;
217         }
218         return rt;
219 }
220
221 /*
222  * Default Router Selection (RFC 2461 6.3.6)
223  */
224 static int inline rt6_check_dev(struct rt6_info *rt, int oif)
225 {
226         struct net_device *dev = rt->rt6i_dev;
227         if (!oif || dev->ifindex == oif)
228                 return 2;
229         if ((dev->flags & IFF_LOOPBACK) &&
230             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
231                 return 1;
232         return 0;
233 }
234
235 static int inline rt6_check_neigh(struct rt6_info *rt)
236 {
237         struct neighbour *neigh = rt->rt6i_nexthop;
238         int m = 0;
239         if (neigh) {
240                 read_lock_bh(&neigh->lock);
241                 if (neigh->nud_state & NUD_VALID)
242                         m = 1;
243                 read_unlock_bh(&neigh->lock);
244         }
245         return m;
246 }
247
248 static int rt6_score_route(struct rt6_info *rt, int oif,
249                            int strict)
250 {
251         int m = rt6_check_dev(rt, oif);
252         if (!m && (strict & RT6_SELECT_F_IFACE))
253                 return -1;
254         if (rt6_check_neigh(rt))
255                 m |= 4;
256         else if (strict & RT6_SELECT_F_REACHABLE)
257                 return -1;
258         return m;
259 }
260
261 static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
262                                    int strict)
263 {
264         struct rt6_info *match = NULL, *last = NULL;
265         struct rt6_info *rt, *rt0 = *head;
266         u32 metric;
267         int mpri = -1;
268
269         RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
270                   __FUNCTION__, head, head ? *head : NULL, oif);
271
272         for (rt = rt0, metric = rt0->rt6i_metric;
273              rt && rt->rt6i_metric == metric;
274              rt = rt->u.next) {
275                 int m;
276
277                 if (rt6_check_expired(rt))
278                         continue;
279
280                 last = rt;
281
282                 m = rt6_score_route(rt, oif, strict);
283                 if (m < 0)
284                         continue;
285
286                 if (m > mpri) {
287                         match = rt;
288                         mpri = m;
289                 }
290         }
291
292         if (!match &&
293             (strict & RT6_SELECT_F_REACHABLE) &&
294             last && last != rt0) {
295                 /* no entries matched; do round-robin */
296                 *head = rt0->u.next;
297                 rt0->u.next = last->u.next;
298                 last->u.next = rt0;
299         }
300
301         RT6_TRACE("%s() => %p, score=%d\n",
302                   __FUNCTION__, match, mpri);
303
304         return (match ? match : &ip6_null_entry);
305 }
306
307 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
308                             int oif, int strict)
309 {
310         struct fib6_node *fn;
311         struct rt6_info *rt;
312
313         read_lock_bh(&rt6_lock);
314         fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
315         rt = rt6_device_match(fn->leaf, oif, strict);
316         dst_hold(&rt->u.dst);
317         rt->u.dst.__use++;
318         read_unlock_bh(&rt6_lock);
319
320         rt->u.dst.lastuse = jiffies;
321         if (rt->u.dst.error == 0)
322                 return rt;
323         dst_release(&rt->u.dst);
324         return NULL;
325 }
326
327 /* ip6_ins_rt is called with FREE rt6_lock.
328    It takes new route entry, the addition fails by any reason the
329    route is freed. In any case, if caller does not hold it, it may
330    be destroyed.
331  */
332
333 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
334                 void *_rtattr, struct netlink_skb_parms *req)
335 {
336         int err;
337
338         write_lock_bh(&rt6_lock);
339         err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
340         write_unlock_bh(&rt6_lock);
341
342         return err;
343 }
344
345 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
346                                       struct in6_addr *saddr)
347 {
348         struct rt6_info *rt;
349
350         /*
351          *      Clone the route.
352          */
353
354         rt = ip6_rt_copy(ort);
355
356         if (rt) {
357                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
358                         if (rt->rt6i_dst.plen != 128 &&
359                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
360                                 rt->rt6i_flags |= RTF_ANYCAST;
361                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
362                 }
363
364                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
365                 rt->rt6i_dst.plen = 128;
366                 rt->rt6i_flags |= RTF_CACHE;
367                 rt->u.dst.flags |= DST_HOST;
368
369 #ifdef CONFIG_IPV6_SUBTREES
370                 if (rt->rt6i_src.plen && saddr) {
371                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
372                         rt->rt6i_src.plen = 128;
373                 }
374 #endif
375
376                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
377
378         }
379
380         return rt;
381 }
382
383 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
384 {
385         struct rt6_info *rt = ip6_rt_copy(ort);
386         if (rt) {
387                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
388                 rt->rt6i_dst.plen = 128;
389                 rt->rt6i_flags |= RTF_CACHE;
390                 if (rt->rt6i_flags & RTF_REJECT)
391                         rt->u.dst.error = ort->u.dst.error;
392                 rt->u.dst.flags |= DST_HOST;
393                 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
394         }
395         return rt;
396 }
397
398 #define BACKTRACK() \
399 if (rt == &ip6_null_entry) { \
400        while ((fn = fn->parent) != NULL) { \
401                 if (fn->fn_flags & RTN_ROOT) { \
402                         goto out; \
403                 } \
404                 if (fn->fn_flags & RTN_RTINFO) \
405                         goto restart; \
406         } \
407 }
408
409
410 void ip6_route_input(struct sk_buff *skb)
411 {
412         struct fib6_node *fn;
413         struct rt6_info *rt, *nrt;
414         int strict;
415         int attempts = 3;
416         int err;
417         int reachable = RT6_SELECT_F_REACHABLE;
418
419         strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
420
421 relookup:
422         read_lock_bh(&rt6_lock);
423
424 restart_2:
425         fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
426                          &skb->nh.ipv6h->saddr);
427
428 restart:
429         rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable);
430         BACKTRACK();
431         if (rt == &ip6_null_entry ||
432             rt->rt6i_flags & RTF_CACHE)
433                 goto out;
434
435         dst_hold(&rt->u.dst);
436         read_unlock_bh(&rt6_lock);
437
438         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
439                 nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
440         else {
441 #if CLONE_OFFLINK_ROUTE
442                 nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
443 #else
444                 goto out2;
445 #endif
446         }
447
448         dst_release(&rt->u.dst);
449         rt = nrt ? : &ip6_null_entry;
450
451         dst_hold(&rt->u.dst);
452         if (nrt) {
453                 err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
454                 if (!err)
455                         goto out2;
456         }
457
458         if (--attempts <= 0)
459                 goto out2;
460
461         /*
462          * Race condition! In the gap, when rt6_lock was
463          * released someone could insert this route.  Relookup.
464          */
465         dst_release(&rt->u.dst);
466         goto relookup;
467
468 out:
469         if (reachable) {
470                 reachable = 0;
471                 goto restart_2;
472         }
473         dst_hold(&rt->u.dst);
474         read_unlock_bh(&rt6_lock);
475 out2:
476         rt->u.dst.lastuse = jiffies;
477         rt->u.dst.__use++;
478         skb->dst = (struct dst_entry *) rt;
479         return;
480 }
481
482 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
483 {
484         struct fib6_node *fn;
485         struct rt6_info *rt, *nrt;
486         int strict;
487         int attempts = 3;
488         int err;
489         int reachable = RT6_SELECT_F_REACHABLE;
490
491         strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
492
493 relookup:
494         read_lock_bh(&rt6_lock);
495
496 restart_2:
497         fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
498
499 restart:
500         rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
501         BACKTRACK();
502         if (rt == &ip6_null_entry ||
503             rt->rt6i_flags & RTF_CACHE)
504                 goto out;
505
506         dst_hold(&rt->u.dst);
507         read_unlock_bh(&rt6_lock);
508
509         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
510                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
511         else {
512 #if CLONE_OFFLINK_ROUTE
513                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
514 #else
515                 goto out2;
516 #endif
517         }
518
519         dst_release(&rt->u.dst);
520         rt = nrt ? : &ip6_null_entry;
521
522         dst_hold(&rt->u.dst);
523         if (nrt) {
524                 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
525                 if (!err)
526                         goto out2;
527         }
528
529         if (--attempts <= 0)
530                 goto out2;
531
532         /*
533          * Race condition! In the gap, when rt6_lock was
534          * released someone could insert this route.  Relookup.
535          */
536         dst_release(&rt->u.dst);
537         goto relookup;
538
539 out:
540         if (reachable) {
541                 reachable = 0;
542                 goto restart_2;
543         }
544         dst_hold(&rt->u.dst);
545         read_unlock_bh(&rt6_lock);
546 out2:
547         rt->u.dst.lastuse = jiffies;
548         rt->u.dst.__use++;
549         return &rt->u.dst;
550 }
551
552
553 /*
554  *      Destination cache support functions
555  */
556
557 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
558 {
559         struct rt6_info *rt;
560
561         rt = (struct rt6_info *) dst;
562
563         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
564                 return dst;
565
566         return NULL;
567 }
568
569 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
570 {
571         struct rt6_info *rt = (struct rt6_info *) dst;
572
573         if (rt) {
574                 if (rt->rt6i_flags & RTF_CACHE)
575                         ip6_del_rt(rt, NULL, NULL, NULL);
576                 else
577                         dst_release(dst);
578         }
579         return NULL;
580 }
581
582 static void ip6_link_failure(struct sk_buff *skb)
583 {
584         struct rt6_info *rt;
585
586         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
587
588         rt = (struct rt6_info *) skb->dst;
589         if (rt) {
590                 if (rt->rt6i_flags&RTF_CACHE) {
591                         dst_set_expires(&rt->u.dst, 0);
592                         rt->rt6i_flags |= RTF_EXPIRES;
593                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
594                         rt->rt6i_node->fn_sernum = -1;
595         }
596 }
597
598 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
599 {
600         struct rt6_info *rt6 = (struct rt6_info*)dst;
601
602         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
603                 rt6->rt6i_flags |= RTF_MODIFIED;
604                 if (mtu < IPV6_MIN_MTU) {
605                         mtu = IPV6_MIN_MTU;
606                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
607                 }
608                 dst->metrics[RTAX_MTU-1] = mtu;
609         }
610 }
611
612 /* Protected by rt6_lock.  */
613 static struct dst_entry *ndisc_dst_gc_list;
614 static int ipv6_get_mtu(struct net_device *dev);
615
616 static inline unsigned int ipv6_advmss(unsigned int mtu)
617 {
618         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
619
620         if (mtu < ip6_rt_min_advmss)
621                 mtu = ip6_rt_min_advmss;
622
623         /*
624          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
625          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
626          * IPV6_MAXPLEN is also valid and means: "any MSS, 
627          * rely only on pmtu discovery"
628          */
629         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
630                 mtu = IPV6_MAXPLEN;
631         return mtu;
632 }
633
634 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
635                                   struct neighbour *neigh,
636                                   struct in6_addr *addr,
637                                   int (*output)(struct sk_buff *))
638 {
639         struct rt6_info *rt;
640         struct inet6_dev *idev = in6_dev_get(dev);
641
642         if (unlikely(idev == NULL))
643                 return NULL;
644
645         rt = ip6_dst_alloc();
646         if (unlikely(rt == NULL)) {
647                 in6_dev_put(idev);
648                 goto out;
649         }
650
651         dev_hold(dev);
652         if (neigh)
653                 neigh_hold(neigh);
654         else
655                 neigh = ndisc_get_neigh(dev, addr);
656
657         rt->rt6i_dev      = dev;
658         rt->rt6i_idev     = idev;
659         rt->rt6i_nexthop  = neigh;
660         atomic_set(&rt->u.dst.__refcnt, 1);
661         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
662         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
663         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
664         rt->u.dst.output  = output;
665
666 #if 0   /* there's no chance to use these for ndisc */
667         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
668                                 ? DST_HOST 
669                                 : 0;
670         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
671         rt->rt6i_dst.plen = 128;
672 #endif
673
674         write_lock_bh(&rt6_lock);
675         rt->u.dst.next = ndisc_dst_gc_list;
676         ndisc_dst_gc_list = &rt->u.dst;
677         write_unlock_bh(&rt6_lock);
678
679         fib6_force_start_gc();
680
681 out:
682         return (struct dst_entry *)rt;
683 }
684
685 int ndisc_dst_gc(int *more)
686 {
687         struct dst_entry *dst, *next, **pprev;
688         int freed;
689
690         next = NULL;
691         pprev = &ndisc_dst_gc_list;
692         freed = 0;
693         while ((dst = *pprev) != NULL) {
694                 if (!atomic_read(&dst->__refcnt)) {
695                         *pprev = dst->next;
696                         dst_free(dst);
697                         freed++;
698                 } else {
699                         pprev = &dst->next;
700                         (*more)++;
701                 }
702         }
703
704         return freed;
705 }
706
707 static int ip6_dst_gc(void)
708 {
709         static unsigned expire = 30*HZ;
710         static unsigned long last_gc;
711         unsigned long now = jiffies;
712
713         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
714             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
715                 goto out;
716
717         expire++;
718         fib6_run_gc(expire);
719         last_gc = now;
720         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
721                 expire = ip6_rt_gc_timeout>>1;
722
723 out:
724         expire -= expire>>ip6_rt_gc_elasticity;
725         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
726 }
727
728 /* Clean host part of a prefix. Not necessary in radix tree,
729    but results in cleaner routing tables.
730
731    Remove it only when all the things will work!
732  */
733
734 static int ipv6_get_mtu(struct net_device *dev)
735 {
736         int mtu = IPV6_MIN_MTU;
737         struct inet6_dev *idev;
738
739         idev = in6_dev_get(dev);
740         if (idev) {
741                 mtu = idev->cnf.mtu6;
742                 in6_dev_put(idev);
743         }
744         return mtu;
745 }
746
747 int ipv6_get_hoplimit(struct net_device *dev)
748 {
749         int hoplimit = ipv6_devconf.hop_limit;
750         struct inet6_dev *idev;
751
752         idev = in6_dev_get(dev);
753         if (idev) {
754                 hoplimit = idev->cnf.hop_limit;
755                 in6_dev_put(idev);
756         }
757         return hoplimit;
758 }
759
760 /*
761  *
762  */
763
764 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, 
765                 void *_rtattr, struct netlink_skb_parms *req)
766 {
767         int err;
768         struct rtmsg *r;
769         struct rtattr **rta;
770         struct rt6_info *rt = NULL;
771         struct net_device *dev = NULL;
772         struct inet6_dev *idev = NULL;
773         int addr_type;
774
775         rta = (struct rtattr **) _rtattr;
776
777         if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
778                 return -EINVAL;
779 #ifndef CONFIG_IPV6_SUBTREES
780         if (rtmsg->rtmsg_src_len)
781                 return -EINVAL;
782 #endif
783         if (rtmsg->rtmsg_ifindex) {
784                 err = -ENODEV;
785                 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
786                 if (!dev)
787                         goto out;
788                 idev = in6_dev_get(dev);
789                 if (!idev)
790                         goto out;
791         }
792
793         if (rtmsg->rtmsg_metric == 0)
794                 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
795
796         rt = ip6_dst_alloc();
797
798         if (rt == NULL) {
799                 err = -ENOMEM;
800                 goto out;
801         }
802
803         rt->u.dst.obsolete = -1;
804         rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
805         if (nlh && (r = NLMSG_DATA(nlh))) {
806                 rt->rt6i_protocol = r->rtm_protocol;
807         } else {
808                 rt->rt6i_protocol = RTPROT_BOOT;
809         }
810
811         addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
812
813         if (addr_type & IPV6_ADDR_MULTICAST)
814                 rt->u.dst.input = ip6_mc_input;
815         else
816                 rt->u.dst.input = ip6_forward;
817
818         rt->u.dst.output = ip6_output;
819
820         ipv6_addr_prefix(&rt->rt6i_dst.addr, 
821                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
822         rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
823         if (rt->rt6i_dst.plen == 128)
824                rt->u.dst.flags = DST_HOST;
825
826 #ifdef CONFIG_IPV6_SUBTREES
827         ipv6_addr_prefix(&rt->rt6i_src.addr, 
828                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
829         rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
830 #endif
831
832         rt->rt6i_metric = rtmsg->rtmsg_metric;
833
834         /* We cannot add true routes via loopback here,
835            they would result in kernel looping; promote them to reject routes
836          */
837         if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
838             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
839                 /* hold loopback dev/idev if we haven't done so. */
840                 if (dev != &loopback_dev) {
841                         if (dev) {
842                                 dev_put(dev);
843                                 in6_dev_put(idev);
844                         }
845                         dev = &loopback_dev;
846                         dev_hold(dev);
847                         idev = in6_dev_get(dev);
848                         if (!idev) {
849                                 err = -ENODEV;
850                                 goto out;
851                         }
852                 }
853                 rt->u.dst.output = ip6_pkt_discard_out;
854                 rt->u.dst.input = ip6_pkt_discard;
855                 rt->u.dst.error = -ENETUNREACH;
856                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
857                 goto install_route;
858         }
859
860         if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
861                 struct in6_addr *gw_addr;
862                 int gwa_type;
863
864                 gw_addr = &rtmsg->rtmsg_gateway;
865                 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
866                 gwa_type = ipv6_addr_type(gw_addr);
867
868                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
869                         struct rt6_info *grt;
870
871                         /* IPv6 strictly inhibits using not link-local
872                            addresses as nexthop address.
873                            Otherwise, router will not able to send redirects.
874                            It is very good, but in some (rare!) circumstances
875                            (SIT, PtP, NBMA NOARP links) it is handy to allow
876                            some exceptions. --ANK
877                          */
878                         err = -EINVAL;
879                         if (!(gwa_type&IPV6_ADDR_UNICAST))
880                                 goto out;
881
882                         grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
883
884                         err = -EHOSTUNREACH;
885                         if (grt == NULL)
886                                 goto out;
887                         if (dev) {
888                                 if (dev != grt->rt6i_dev) {
889                                         dst_release(&grt->u.dst);
890                                         goto out;
891                                 }
892                         } else {
893                                 dev = grt->rt6i_dev;
894                                 idev = grt->rt6i_idev;
895                                 dev_hold(dev);
896                                 in6_dev_hold(grt->rt6i_idev);
897                         }
898                         if (!(grt->rt6i_flags&RTF_GATEWAY))
899                                 err = 0;
900                         dst_release(&grt->u.dst);
901
902                         if (err)
903                                 goto out;
904                 }
905                 err = -EINVAL;
906                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
907                         goto out;
908         }
909
910         err = -ENODEV;
911         if (dev == NULL)
912                 goto out;
913
914         if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
915                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
916                 if (IS_ERR(rt->rt6i_nexthop)) {
917                         err = PTR_ERR(rt->rt6i_nexthop);
918                         rt->rt6i_nexthop = NULL;
919                         goto out;
920                 }
921         }
922
923         rt->rt6i_flags = rtmsg->rtmsg_flags;
924
925 install_route:
926         if (rta && rta[RTA_METRICS-1]) {
927                 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
928                 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
929
930                 while (RTA_OK(attr, attrlen)) {
931                         unsigned flavor = attr->rta_type;
932                         if (flavor) {
933                                 if (flavor > RTAX_MAX) {
934                                         err = -EINVAL;
935                                         goto out;
936                                 }
937                                 rt->u.dst.metrics[flavor-1] =
938                                         *(u32 *)RTA_DATA(attr);
939                         }
940                         attr = RTA_NEXT(attr, attrlen);
941                 }
942         }
943
944         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
945                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
946         if (!rt->u.dst.metrics[RTAX_MTU-1])
947                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
948         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
949                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
950         rt->u.dst.dev = dev;
951         rt->rt6i_idev = idev;
952         return ip6_ins_rt(rt, nlh, _rtattr, req);
953
954 out:
955         if (dev)
956                 dev_put(dev);
957         if (idev)
958                 in6_dev_put(idev);
959         if (rt)
960                 dst_free((struct dst_entry *) rt);
961         return err;
962 }
963
964 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
965 {
966         int err;
967
968         write_lock_bh(&rt6_lock);
969
970         err = fib6_del(rt, nlh, _rtattr, req);
971         dst_release(&rt->u.dst);
972
973         write_unlock_bh(&rt6_lock);
974
975         return err;
976 }
977
978 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
979 {
980         struct fib6_node *fn;
981         struct rt6_info *rt;
982         int err = -ESRCH;
983
984         read_lock_bh(&rt6_lock);
985
986         fn = fib6_locate(&ip6_routing_table,
987                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
988                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
989         
990         if (fn) {
991                 for (rt = fn->leaf; rt; rt = rt->u.next) {
992                         if (rtmsg->rtmsg_ifindex &&
993                             (rt->rt6i_dev == NULL ||
994                              rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
995                                 continue;
996                         if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
997                             !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
998                                 continue;
999                         if (rtmsg->rtmsg_metric &&
1000                             rtmsg->rtmsg_metric != rt->rt6i_metric)
1001                                 continue;
1002                         dst_hold(&rt->u.dst);
1003                         read_unlock_bh(&rt6_lock);
1004
1005                         return ip6_del_rt(rt, nlh, _rtattr, req);
1006                 }
1007         }
1008         read_unlock_bh(&rt6_lock);
1009
1010         return err;
1011 }
1012
1013 /*
1014  *      Handle redirects
1015  */
1016 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1017                   struct neighbour *neigh, u8 *lladdr, int on_link)
1018 {
1019         struct rt6_info *rt, *nrt;
1020
1021         /* Locate old route to this destination. */
1022         rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1023
1024         if (rt == NULL)
1025                 return;
1026
1027         if (neigh->dev != rt->rt6i_dev)
1028                 goto out;
1029
1030         /*
1031          * Current route is on-link; redirect is always invalid.
1032          * 
1033          * Seems, previous statement is not true. It could
1034          * be node, which looks for us as on-link (f.e. proxy ndisc)
1035          * But then router serving it might decide, that we should
1036          * know truth 8)8) --ANK (980726).
1037          */
1038         if (!(rt->rt6i_flags&RTF_GATEWAY))
1039                 goto out;
1040
1041         /*
1042          *      RFC 2461 specifies that redirects should only be
1043          *      accepted if they come from the nexthop to the target.
1044          *      Due to the way default routers are chosen, this notion
1045          *      is a bit fuzzy and one might need to check all default
1046          *      routers.
1047          */
1048         if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1049                 if (rt->rt6i_flags & RTF_DEFAULT) {
1050                         struct rt6_info *rt1;
1051
1052                         read_lock(&rt6_lock);
1053                         for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1054                                 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1055                                         dst_hold(&rt1->u.dst);
1056                                         dst_release(&rt->u.dst);
1057                                         read_unlock(&rt6_lock);
1058                                         rt = rt1;
1059                                         goto source_ok;
1060                                 }
1061                         }
1062                         read_unlock(&rt6_lock);
1063                 }
1064                 if (net_ratelimit())
1065                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1066                                "for redirect target\n");
1067                 goto out;
1068         }
1069
1070 source_ok:
1071
1072         /*
1073          *      We have finally decided to accept it.
1074          */
1075
1076         neigh_update(neigh, lladdr, NUD_STALE, 
1077                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1078                      NEIGH_UPDATE_F_OVERRIDE|
1079                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1080                                      NEIGH_UPDATE_F_ISROUTER))
1081                      );
1082
1083         /*
1084          * Redirect received -> path was valid.
1085          * Look, redirects are sent only in response to data packets,
1086          * so that this nexthop apparently is reachable. --ANK
1087          */
1088         dst_confirm(&rt->u.dst);
1089
1090         /* Duplicate redirect: silently ignore. */
1091         if (neigh == rt->u.dst.neighbour)
1092                 goto out;
1093
1094         nrt = ip6_rt_copy(rt);
1095         if (nrt == NULL)
1096                 goto out;
1097
1098         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1099         if (on_link)
1100                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1101
1102         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1103         nrt->rt6i_dst.plen = 128;
1104         nrt->u.dst.flags |= DST_HOST;
1105
1106         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1107         nrt->rt6i_nexthop = neigh_clone(neigh);
1108         /* Reset pmtu, it may be better */
1109         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1110         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1111
1112         if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1113                 goto out;
1114
1115         if (rt->rt6i_flags&RTF_CACHE) {
1116                 ip6_del_rt(rt, NULL, NULL, NULL);
1117                 return;
1118         }
1119
1120 out:
1121         dst_release(&rt->u.dst);
1122         return;
1123 }
1124
1125 /*
1126  *      Handle ICMP "packet too big" messages
1127  *      i.e. Path MTU discovery
1128  */
1129
1130 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1131                         struct net_device *dev, u32 pmtu)
1132 {
1133         struct rt6_info *rt, *nrt;
1134         int allfrag = 0;
1135
1136         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1137         if (rt == NULL)
1138                 return;
1139
1140         if (pmtu >= dst_mtu(&rt->u.dst))
1141                 goto out;
1142
1143         if (pmtu < IPV6_MIN_MTU) {
1144                 /*
1145                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link 
1146                  * MTU (1280) and a fragment header should always be included
1147                  * after a node receiving Too Big message reporting PMTU is
1148                  * less than the IPv6 Minimum Link MTU.
1149                  */
1150                 pmtu = IPV6_MIN_MTU;
1151                 allfrag = 1;
1152         }
1153
1154         /* New mtu received -> path was valid.
1155            They are sent only in response to data packets,
1156            so that this nexthop apparently is reachable. --ANK
1157          */
1158         dst_confirm(&rt->u.dst);
1159
1160         /* Host route. If it is static, it would be better
1161            not to override it, but add new one, so that
1162            when cache entry will expire old pmtu
1163            would return automatically.
1164          */
1165         if (rt->rt6i_flags & RTF_CACHE) {
1166                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1167                 if (allfrag)
1168                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1169                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1170                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1171                 goto out;
1172         }
1173
1174         /* Network route.
1175            Two cases are possible:
1176            1. It is connected route. Action: COW
1177            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1178          */
1179         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1180                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1181         else
1182                 nrt = rt6_alloc_clone(rt, daddr);
1183
1184         if (nrt) {
1185                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1186                 if (allfrag)
1187                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1188
1189                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1190                  * happened within 5 mins, the recommended timer is 10 mins.
1191                  * Here this route expiration time is set to ip6_rt_mtu_expires
1192                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1193                  * and detecting PMTU increase will be automatically happened.
1194                  */
1195                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1196                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1197
1198                 ip6_ins_rt(nrt, NULL, NULL, NULL);
1199         }
1200 out:
1201         dst_release(&rt->u.dst);
1202 }
1203
1204 /*
1205  *      Misc support functions
1206  */
1207
1208 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1209 {
1210         struct rt6_info *rt = ip6_dst_alloc();
1211
1212         if (rt) {
1213                 rt->u.dst.input = ort->u.dst.input;
1214                 rt->u.dst.output = ort->u.dst.output;
1215
1216                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1217                 rt->u.dst.dev = ort->u.dst.dev;
1218                 if (rt->u.dst.dev)
1219                         dev_hold(rt->u.dst.dev);
1220                 rt->rt6i_idev = ort->rt6i_idev;
1221                 if (rt->rt6i_idev)
1222                         in6_dev_hold(rt->rt6i_idev);
1223                 rt->u.dst.lastuse = jiffies;
1224                 rt->rt6i_expires = 0;
1225
1226                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1227                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1228                 rt->rt6i_metric = 0;
1229
1230                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1231 #ifdef CONFIG_IPV6_SUBTREES
1232                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1233 #endif
1234         }
1235         return rt;
1236 }
1237
1238 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1239 {       
1240         struct rt6_info *rt;
1241         struct fib6_node *fn;
1242
1243         fn = &ip6_routing_table;
1244
1245         write_lock_bh(&rt6_lock);
1246         for (rt = fn->leaf; rt; rt=rt->u.next) {
1247                 if (dev == rt->rt6i_dev &&
1248                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1249                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1250                         break;
1251         }
1252         if (rt)
1253                 dst_hold(&rt->u.dst);
1254         write_unlock_bh(&rt6_lock);
1255         return rt;
1256 }
1257
1258 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1259                                      struct net_device *dev)
1260 {
1261         struct in6_rtmsg rtmsg;
1262
1263         memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1264         rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1265         ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1266         rtmsg.rtmsg_metric = 1024;
1267         rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1268
1269         rtmsg.rtmsg_ifindex = dev->ifindex;
1270
1271         ip6_route_add(&rtmsg, NULL, NULL, NULL);
1272         return rt6_get_dflt_router(gwaddr, dev);
1273 }
1274
1275 void rt6_purge_dflt_routers(void)
1276 {
1277         struct rt6_info *rt;
1278
1279 restart:
1280         read_lock_bh(&rt6_lock);
1281         for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1282                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1283                         dst_hold(&rt->u.dst);
1284
1285                         read_unlock_bh(&rt6_lock);
1286
1287                         ip6_del_rt(rt, NULL, NULL, NULL);
1288
1289                         goto restart;
1290                 }
1291         }
1292         read_unlock_bh(&rt6_lock);
1293 }
1294
1295 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1296 {
1297         struct in6_rtmsg rtmsg;
1298         int err;
1299
1300         switch(cmd) {
1301         case SIOCADDRT:         /* Add a route */
1302         case SIOCDELRT:         /* Delete a route */
1303                 if (!capable(CAP_NET_ADMIN))
1304                         return -EPERM;
1305                 err = copy_from_user(&rtmsg, arg,
1306                                      sizeof(struct in6_rtmsg));
1307                 if (err)
1308                         return -EFAULT;
1309                         
1310                 rtnl_lock();
1311                 switch (cmd) {
1312                 case SIOCADDRT:
1313                         err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1314                         break;
1315                 case SIOCDELRT:
1316                         err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1317                         break;
1318                 default:
1319                         err = -EINVAL;
1320                 }
1321                 rtnl_unlock();
1322
1323                 return err;
1324         };
1325
1326         return -EINVAL;
1327 }
1328
1329 /*
1330  *      Drop the packet on the floor
1331  */
1332
1333 static int ip6_pkt_discard(struct sk_buff *skb)
1334 {
1335         IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1336         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1337         kfree_skb(skb);
1338         return 0;
1339 }
1340
1341 static int ip6_pkt_discard_out(struct sk_buff *skb)
1342 {
1343         skb->dev = skb->dst->dev;
1344         return ip6_pkt_discard(skb);
1345 }
1346
1347 /*
1348  *      Allocate a dst for local (unicast / anycast) address.
1349  */
1350
1351 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1352                                     const struct in6_addr *addr,
1353                                     int anycast)
1354 {
1355         struct rt6_info *rt = ip6_dst_alloc();
1356
1357         if (rt == NULL)
1358                 return ERR_PTR(-ENOMEM);
1359
1360         dev_hold(&loopback_dev);
1361         in6_dev_hold(idev);
1362
1363         rt->u.dst.flags = DST_HOST;
1364         rt->u.dst.input = ip6_input;
1365         rt->u.dst.output = ip6_output;
1366         rt->rt6i_dev = &loopback_dev;
1367         rt->rt6i_idev = idev;
1368         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1369         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1370         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1371         rt->u.dst.obsolete = -1;
1372
1373         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1374         if (anycast)
1375                 rt->rt6i_flags |= RTF_ANYCAST;
1376         else
1377                 rt->rt6i_flags |= RTF_LOCAL;
1378         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1379         if (rt->rt6i_nexthop == NULL) {
1380                 dst_free((struct dst_entry *) rt);
1381                 return ERR_PTR(-ENOMEM);
1382         }
1383
1384         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1385         rt->rt6i_dst.plen = 128;
1386
1387         atomic_set(&rt->u.dst.__refcnt, 1);
1388
1389         return rt;
1390 }
1391
1392 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1393 {
1394         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1395             rt != &ip6_null_entry) {
1396                 RT6_TRACE("deleted by ifdown %p\n", rt);
1397                 return -1;
1398         }
1399         return 0;
1400 }
1401
1402 void rt6_ifdown(struct net_device *dev)
1403 {
1404         write_lock_bh(&rt6_lock);
1405         fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1406         write_unlock_bh(&rt6_lock);
1407 }
1408
1409 struct rt6_mtu_change_arg
1410 {
1411         struct net_device *dev;
1412         unsigned mtu;
1413 };
1414
1415 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1416 {
1417         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1418         struct inet6_dev *idev;
1419
1420         /* In IPv6 pmtu discovery is not optional,
1421            so that RTAX_MTU lock cannot disable it.
1422            We still use this lock to block changes
1423            caused by addrconf/ndisc.
1424         */
1425
1426         idev = __in6_dev_get(arg->dev);
1427         if (idev == NULL)
1428                 return 0;
1429
1430         /* For administrative MTU increase, there is no way to discover
1431            IPv6 PMTU increase, so PMTU increase should be updated here.
1432            Since RFC 1981 doesn't include administrative MTU increase
1433            update PMTU increase is a MUST. (i.e. jumbo frame)
1434          */
1435         /*
1436            If new MTU is less than route PMTU, this new MTU will be the
1437            lowest MTU in the path, update the route PMTU to reflect PMTU
1438            decreases; if new MTU is greater than route PMTU, and the
1439            old MTU is the lowest MTU in the path, update the route PMTU
1440            to reflect the increase. In this case if the other nodes' MTU
1441            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1442            PMTU discouvery.
1443          */
1444         if (rt->rt6i_dev == arg->dev &&
1445             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1446             (dst_mtu(&rt->u.dst) > arg->mtu ||
1447              (dst_mtu(&rt->u.dst) < arg->mtu &&
1448               dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1449                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1450         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1451         return 0;
1452 }
1453
1454 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1455 {
1456         struct rt6_mtu_change_arg arg;
1457
1458         arg.dev = dev;
1459         arg.mtu = mtu;
1460         read_lock_bh(&rt6_lock);
1461         fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1462         read_unlock_bh(&rt6_lock);
1463 }
1464
1465 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1466                               struct in6_rtmsg *rtmsg)
1467 {
1468         memset(rtmsg, 0, sizeof(*rtmsg));
1469
1470         rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1471         rtmsg->rtmsg_src_len = r->rtm_src_len;
1472         rtmsg->rtmsg_flags = RTF_UP;
1473         if (r->rtm_type == RTN_UNREACHABLE)
1474                 rtmsg->rtmsg_flags |= RTF_REJECT;
1475
1476         if (rta[RTA_GATEWAY-1]) {
1477                 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1478                         return -EINVAL;
1479                 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1480                 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1481         }
1482         if (rta[RTA_DST-1]) {
1483                 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1484                         return -EINVAL;
1485                 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1486         }
1487         if (rta[RTA_SRC-1]) {
1488                 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1489                         return -EINVAL;
1490                 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1491         }
1492         if (rta[RTA_OIF-1]) {
1493                 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1494                         return -EINVAL;
1495                 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1496         }
1497         if (rta[RTA_PRIORITY-1]) {
1498                 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1499                         return -EINVAL;
1500                 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1501         }
1502         return 0;
1503 }
1504
1505 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1506 {
1507         struct rtmsg *r = NLMSG_DATA(nlh);
1508         struct in6_rtmsg rtmsg;
1509
1510         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1511                 return -EINVAL;
1512         return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1513 }
1514
1515 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1516 {
1517         struct rtmsg *r = NLMSG_DATA(nlh);
1518         struct in6_rtmsg rtmsg;
1519
1520         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1521                 return -EINVAL;
1522         return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1523 }
1524
1525 struct rt6_rtnl_dump_arg
1526 {
1527         struct sk_buff *skb;
1528         struct netlink_callback *cb;
1529 };
1530
1531 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1532                          struct in6_addr *dst, struct in6_addr *src,
1533                          int iif, int type, u32 pid, u32 seq,
1534                          int prefix, unsigned int flags)
1535 {
1536         struct rtmsg *rtm;
1537         struct nlmsghdr  *nlh;
1538         unsigned char    *b = skb->tail;
1539         struct rta_cacheinfo ci;
1540
1541         if (prefix) {   /* user wants prefix routes only */
1542                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1543                         /* success since this is not a prefix route */
1544                         return 1;
1545                 }
1546         }
1547
1548         nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1549         rtm = NLMSG_DATA(nlh);
1550         rtm->rtm_family = AF_INET6;
1551         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1552         rtm->rtm_src_len = rt->rt6i_src.plen;
1553         rtm->rtm_tos = 0;
1554         rtm->rtm_table = RT_TABLE_MAIN;
1555         if (rt->rt6i_flags&RTF_REJECT)
1556                 rtm->rtm_type = RTN_UNREACHABLE;
1557         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1558                 rtm->rtm_type = RTN_LOCAL;
1559         else
1560                 rtm->rtm_type = RTN_UNICAST;
1561         rtm->rtm_flags = 0;
1562         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1563         rtm->rtm_protocol = rt->rt6i_protocol;
1564         if (rt->rt6i_flags&RTF_DYNAMIC)
1565                 rtm->rtm_protocol = RTPROT_REDIRECT;
1566         else if (rt->rt6i_flags & RTF_ADDRCONF)
1567                 rtm->rtm_protocol = RTPROT_KERNEL;
1568         else if (rt->rt6i_flags&RTF_DEFAULT)
1569                 rtm->rtm_protocol = RTPROT_RA;
1570
1571         if (rt->rt6i_flags&RTF_CACHE)
1572                 rtm->rtm_flags |= RTM_F_CLONED;
1573
1574         if (dst) {
1575                 RTA_PUT(skb, RTA_DST, 16, dst);
1576                 rtm->rtm_dst_len = 128;
1577         } else if (rtm->rtm_dst_len)
1578                 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1579 #ifdef CONFIG_IPV6_SUBTREES
1580         if (src) {
1581                 RTA_PUT(skb, RTA_SRC, 16, src);
1582                 rtm->rtm_src_len = 128;
1583         } else if (rtm->rtm_src_len)
1584                 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1585 #endif
1586         if (iif)
1587                 RTA_PUT(skb, RTA_IIF, 4, &iif);
1588         else if (dst) {
1589                 struct in6_addr saddr_buf;
1590                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1591                         RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1592         }
1593         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1594                 goto rtattr_failure;
1595         if (rt->u.dst.neighbour)
1596                 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1597         if (rt->u.dst.dev)
1598                 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1599         RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1600         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1601         if (rt->rt6i_expires)
1602                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1603         else
1604                 ci.rta_expires = 0;
1605         ci.rta_used = rt->u.dst.__use;
1606         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1607         ci.rta_error = rt->u.dst.error;
1608         ci.rta_id = 0;
1609         ci.rta_ts = 0;
1610         ci.rta_tsage = 0;
1611         RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1612         nlh->nlmsg_len = skb->tail - b;
1613         return skb->len;
1614
1615 nlmsg_failure:
1616 rtattr_failure:
1617         skb_trim(skb, b - skb->data);
1618         return -1;
1619 }
1620
1621 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1622 {
1623         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1624         int prefix;
1625
1626         if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1627                 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1628                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1629         } else
1630                 prefix = 0;
1631
1632         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1633                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1634                      prefix, NLM_F_MULTI);
1635 }
1636
1637 static int fib6_dump_node(struct fib6_walker_t *w)
1638 {
1639         int res;
1640         struct rt6_info *rt;
1641
1642         for (rt = w->leaf; rt; rt = rt->u.next) {
1643                 res = rt6_dump_route(rt, w->args);
1644                 if (res < 0) {
1645                         /* Frame is full, suspend walking */
1646                         w->leaf = rt;
1647                         return 1;
1648                 }
1649                 BUG_TRAP(res!=0);
1650         }
1651         w->leaf = NULL;
1652         return 0;
1653 }
1654
1655 static void fib6_dump_end(struct netlink_callback *cb)
1656 {
1657         struct fib6_walker_t *w = (void*)cb->args[0];
1658
1659         if (w) {
1660                 cb->args[0] = 0;
1661                 fib6_walker_unlink(w);
1662                 kfree(w);
1663         }
1664         cb->done = (void*)cb->args[1];
1665         cb->args[1] = 0;
1666 }
1667
1668 static int fib6_dump_done(struct netlink_callback *cb)
1669 {
1670         fib6_dump_end(cb);
1671         return cb->done ? cb->done(cb) : 0;
1672 }
1673
1674 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1675 {
1676         struct rt6_rtnl_dump_arg arg;
1677         struct fib6_walker_t *w;
1678         int res;
1679
1680         arg.skb = skb;
1681         arg.cb = cb;
1682
1683         w = (void*)cb->args[0];
1684         if (w == NULL) {
1685                 /* New dump:
1686                  * 
1687                  * 1. hook callback destructor.
1688                  */
1689                 cb->args[1] = (long)cb->done;
1690                 cb->done = fib6_dump_done;
1691
1692                 /*
1693                  * 2. allocate and initialize walker.
1694                  */
1695                 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1696                 if (w == NULL)
1697                         return -ENOMEM;
1698                 RT6_TRACE("dump<%p", w);
1699                 memset(w, 0, sizeof(*w));
1700                 w->root = &ip6_routing_table;
1701                 w->func = fib6_dump_node;
1702                 w->args = &arg;
1703                 cb->args[0] = (long)w;
1704                 read_lock_bh(&rt6_lock);
1705                 res = fib6_walk(w);
1706                 read_unlock_bh(&rt6_lock);
1707         } else {
1708                 w->args = &arg;
1709                 read_lock_bh(&rt6_lock);
1710                 res = fib6_walk_continue(w);
1711                 read_unlock_bh(&rt6_lock);
1712         }
1713 #if RT6_DEBUG >= 3
1714         if (res <= 0 && skb->len == 0)
1715                 RT6_TRACE("%p>dump end\n", w);
1716 #endif
1717         res = res < 0 ? res : skb->len;
1718         /* res < 0 is an error. (really, impossible)
1719            res == 0 means that dump is complete, but skb still can contain data.
1720            res > 0 dump is not complete, but frame is full.
1721          */
1722         /* Destroy walker, if dump of this table is complete. */
1723         if (res <= 0)
1724                 fib6_dump_end(cb);
1725         return res;
1726 }
1727
1728 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1729 {
1730         struct rtattr **rta = arg;
1731         int iif = 0;
1732         int err = -ENOBUFS;
1733         struct sk_buff *skb;
1734         struct flowi fl;
1735         struct rt6_info *rt;
1736
1737         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1738         if (skb == NULL)
1739                 goto out;
1740
1741         /* Reserve room for dummy headers, this skb can pass
1742            through good chunk of routing engine.
1743          */
1744         skb->mac.raw = skb->data;
1745         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1746
1747         memset(&fl, 0, sizeof(fl));
1748         if (rta[RTA_SRC-1])
1749                 ipv6_addr_copy(&fl.fl6_src,
1750                                (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1751         if (rta[RTA_DST-1])
1752                 ipv6_addr_copy(&fl.fl6_dst,
1753                                (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1754
1755         if (rta[RTA_IIF-1])
1756                 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1757
1758         if (iif) {
1759                 struct net_device *dev;
1760                 dev = __dev_get_by_index(iif);
1761                 if (!dev) {
1762                         err = -ENODEV;
1763                         goto out_free;
1764                 }
1765         }
1766
1767         fl.oif = 0;
1768         if (rta[RTA_OIF-1])
1769                 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1770
1771         rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1772
1773         skb->dst = &rt->u.dst;
1774
1775         NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1776         err = rt6_fill_node(skb, rt, 
1777                             &fl.fl6_dst, &fl.fl6_src,
1778                             iif,
1779                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1780                             nlh->nlmsg_seq, 0, 0);
1781         if (err < 0) {
1782                 err = -EMSGSIZE;
1783                 goto out_free;
1784         }
1785
1786         err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1787         if (err > 0)
1788                 err = 0;
1789 out:
1790         return err;
1791 out_free:
1792         kfree_skb(skb);
1793         goto out;       
1794 }
1795
1796 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, 
1797                         struct netlink_skb_parms *req)
1798 {
1799         struct sk_buff *skb;
1800         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1801         u32 pid = current->pid;
1802         u32 seq = 0;
1803
1804         if (req)
1805                 pid = req->pid;
1806         if (nlh)
1807                 seq = nlh->nlmsg_seq;
1808         
1809         skb = alloc_skb(size, gfp_any());
1810         if (!skb) {
1811                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1812                 return;
1813         }
1814         if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1815                 kfree_skb(skb);
1816                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1817                 return;
1818         }
1819         NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1820         netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1821 }
1822
1823 /*
1824  *      /proc
1825  */
1826
1827 #ifdef CONFIG_PROC_FS
1828
1829 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1830
1831 struct rt6_proc_arg
1832 {
1833         char *buffer;
1834         int offset;
1835         int length;
1836         int skip;
1837         int len;
1838 };
1839
1840 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1841 {
1842         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1843         int i;
1844
1845         if (arg->skip < arg->offset / RT6_INFO_LEN) {
1846                 arg->skip++;
1847                 return 0;
1848         }
1849
1850         if (arg->len >= arg->length)
1851                 return 0;
1852
1853         for (i=0; i<16; i++) {
1854                 sprintf(arg->buffer + arg->len, "%02x",
1855                         rt->rt6i_dst.addr.s6_addr[i]);
1856                 arg->len += 2;
1857         }
1858         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1859                             rt->rt6i_dst.plen);
1860
1861 #ifdef CONFIG_IPV6_SUBTREES
1862         for (i=0; i<16; i++) {
1863                 sprintf(arg->buffer + arg->len, "%02x",
1864                         rt->rt6i_src.addr.s6_addr[i]);
1865                 arg->len += 2;
1866         }
1867         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1868                             rt->rt6i_src.plen);
1869 #else
1870         sprintf(arg->buffer + arg->len,
1871                 "00000000000000000000000000000000 00 ");
1872         arg->len += 36;
1873 #endif
1874
1875         if (rt->rt6i_nexthop) {
1876                 for (i=0; i<16; i++) {
1877                         sprintf(arg->buffer + arg->len, "%02x",
1878                                 rt->rt6i_nexthop->primary_key[i]);
1879                         arg->len += 2;
1880                 }
1881         } else {
1882                 sprintf(arg->buffer + arg->len,
1883                         "00000000000000000000000000000000");
1884                 arg->len += 32;
1885         }
1886         arg->len += sprintf(arg->buffer + arg->len,
1887                             " %08x %08x %08x %08x %8s\n",
1888                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1889                             rt->u.dst.__use, rt->rt6i_flags, 
1890                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
1891         return 0;
1892 }
1893
1894 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1895 {
1896         struct rt6_proc_arg arg;
1897         arg.buffer = buffer;
1898         arg.offset = offset;
1899         arg.length = length;
1900         arg.skip = 0;
1901         arg.len = 0;
1902
1903         read_lock_bh(&rt6_lock);
1904         fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1905         read_unlock_bh(&rt6_lock);
1906
1907         *start = buffer;
1908         if (offset)
1909                 *start += offset % RT6_INFO_LEN;
1910
1911         arg.len -= offset % RT6_INFO_LEN;
1912
1913         if (arg.len > length)
1914                 arg.len = length;
1915         if (arg.len < 0)
1916                 arg.len = 0;
1917
1918         return arg.len;
1919 }
1920
1921 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1922 {
1923         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1924                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1925                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1926                       rt6_stats.fib_rt_cache,
1927                       atomic_read(&ip6_dst_ops.entries),
1928                       rt6_stats.fib_discarded_routes);
1929
1930         return 0;
1931 }
1932
1933 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1934 {
1935         return single_open(file, rt6_stats_seq_show, NULL);
1936 }
1937
1938 static struct file_operations rt6_stats_seq_fops = {
1939         .owner   = THIS_MODULE,
1940         .open    = rt6_stats_seq_open,
1941         .read    = seq_read,
1942         .llseek  = seq_lseek,
1943         .release = single_release,
1944 };
1945 #endif  /* CONFIG_PROC_FS */
1946
1947 #ifdef CONFIG_SYSCTL
1948
1949 static int flush_delay;
1950
1951 static
1952 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1953                               void __user *buffer, size_t *lenp, loff_t *ppos)
1954 {
1955         if (write) {
1956                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1957                 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
1958                 return 0;
1959         } else
1960                 return -EINVAL;
1961 }
1962
1963 ctl_table ipv6_route_table[] = {
1964         {
1965                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
1966                 .procname       =       "flush",
1967                 .data           =       &flush_delay,
1968                 .maxlen         =       sizeof(int),
1969                 .mode           =       0200,
1970                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
1971         },
1972         {
1973                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
1974                 .procname       =       "gc_thresh",
1975                 .data           =       &ip6_dst_ops.gc_thresh,
1976                 .maxlen         =       sizeof(int),
1977                 .mode           =       0644,
1978                 .proc_handler   =       &proc_dointvec,
1979         },
1980         {
1981                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
1982                 .procname       =       "max_size",
1983                 .data           =       &ip6_rt_max_size,
1984                 .maxlen         =       sizeof(int),
1985                 .mode           =       0644,
1986                 .proc_handler   =       &proc_dointvec,
1987         },
1988         {
1989                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
1990                 .procname       =       "gc_min_interval",
1991                 .data           =       &ip6_rt_gc_min_interval,
1992                 .maxlen         =       sizeof(int),
1993                 .mode           =       0644,
1994                 .proc_handler   =       &proc_dointvec_jiffies,
1995                 .strategy       =       &sysctl_jiffies,
1996         },
1997         {
1998                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
1999                 .procname       =       "gc_timeout",
2000                 .data           =       &ip6_rt_gc_timeout,
2001                 .maxlen         =       sizeof(int),
2002                 .mode           =       0644,
2003                 .proc_handler   =       &proc_dointvec_jiffies,
2004                 .strategy       =       &sysctl_jiffies,
2005         },
2006         {
2007                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2008                 .procname       =       "gc_interval",
2009                 .data           =       &ip6_rt_gc_interval,
2010                 .maxlen         =       sizeof(int),
2011                 .mode           =       0644,
2012                 .proc_handler   =       &proc_dointvec_jiffies,
2013                 .strategy       =       &sysctl_jiffies,
2014         },
2015         {
2016                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2017                 .procname       =       "gc_elasticity",
2018                 .data           =       &ip6_rt_gc_elasticity,
2019                 .maxlen         =       sizeof(int),
2020                 .mode           =       0644,
2021                 .proc_handler   =       &proc_dointvec_jiffies,
2022                 .strategy       =       &sysctl_jiffies,
2023         },
2024         {
2025                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2026                 .procname       =       "mtu_expires",
2027                 .data           =       &ip6_rt_mtu_expires,
2028                 .maxlen         =       sizeof(int),
2029                 .mode           =       0644,
2030                 .proc_handler   =       &proc_dointvec_jiffies,
2031                 .strategy       =       &sysctl_jiffies,
2032         },
2033         {
2034                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2035                 .procname       =       "min_adv_mss",
2036                 .data           =       &ip6_rt_min_advmss,
2037                 .maxlen         =       sizeof(int),
2038                 .mode           =       0644,
2039                 .proc_handler   =       &proc_dointvec_jiffies,
2040                 .strategy       =       &sysctl_jiffies,
2041         },
2042         {
2043                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2044                 .procname       =       "gc_min_interval_ms",
2045                 .data           =       &ip6_rt_gc_min_interval,
2046                 .maxlen         =       sizeof(int),
2047                 .mode           =       0644,
2048                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2049                 .strategy       =       &sysctl_ms_jiffies,
2050         },
2051         { .ctl_name = 0 }
2052 };
2053
2054 #endif
2055
2056 void __init ip6_route_init(void)
2057 {
2058         struct proc_dir_entry *p;
2059
2060         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2061                                                      sizeof(struct rt6_info),
2062                                                      0, SLAB_HWCACHE_ALIGN,
2063                                                      NULL, NULL);
2064         if (!ip6_dst_ops.kmem_cachep)
2065                 panic("cannot create ip6_dst_cache");
2066
2067         fib6_init();
2068 #ifdef  CONFIG_PROC_FS
2069         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2070         if (p)
2071                 p->owner = THIS_MODULE;
2072
2073         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2074 #endif
2075 #ifdef CONFIG_XFRM
2076         xfrm6_init();
2077 #endif
2078 }
2079
2080 void ip6_route_cleanup(void)
2081 {
2082 #ifdef CONFIG_PROC_FS
2083         proc_net_remove("ipv6_route");
2084         proc_net_remove("rt6_stats");
2085 #endif
2086 #ifdef CONFIG_XFRM
2087         xfrm6_fini();
2088 #endif
2089         rt6_ifdown(NULL);
2090         fib6_gc_cleanup();
2091         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2092 }