]> err.no Git - linux-2.6/blob - net/ipv6/route.c
[IPV6]: ROUTE: Add router_probe_interval sysctl.
[linux-2.6] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/capability.h>
28 #include <linux/config.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/init.h>
39 #include <linux/netlink.h>
40 #include <linux/if_arp.h>
41
42 #ifdef  CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46
47 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
53 #include <net/tcp.h>
54 #include <linux/rtnetlink.h>
55 #include <net/dst.h>
56 #include <net/xfrm.h>
57
58 #include <asm/uaccess.h>
59
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74
75 #define CLONE_OFFLINK_ROUTE 0
76
77 #define RT6_SELECT_F_IFACE      0x1
78 #define RT6_SELECT_F_REACHABLE  0x2
79
80 static int ip6_rt_max_size = 4096;
81 static int ip6_rt_gc_min_interval = HZ / 2;
82 static int ip6_rt_gc_timeout = 60*HZ;
83 int ip6_rt_gc_interval = 30*HZ;
84 static int ip6_rt_gc_elasticity = 9;
85 static int ip6_rt_mtu_expires = 10*60*HZ;
86 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91 static void             ip6_dst_destroy(struct dst_entry *);
92 static void             ip6_dst_ifdown(struct dst_entry *,
93                                        struct net_device *dev, int how);
94 static int               ip6_dst_gc(void);
95
96 static int              ip6_pkt_discard(struct sk_buff *skb);
97 static int              ip6_pkt_discard_out(struct sk_buff *skb);
98 static void             ip6_link_failure(struct sk_buff *skb);
99 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
101 static struct dst_ops ip6_dst_ops = {
102         .family                 =       AF_INET6,
103         .protocol               =       __constant_htons(ETH_P_IPV6),
104         .gc                     =       ip6_dst_gc,
105         .gc_thresh              =       1024,
106         .check                  =       ip6_dst_check,
107         .destroy                =       ip6_dst_destroy,
108         .ifdown                 =       ip6_dst_ifdown,
109         .negative_advice        =       ip6_negative_advice,
110         .link_failure           =       ip6_link_failure,
111         .update_pmtu            =       ip6_rt_update_pmtu,
112         .entry_size             =       sizeof(struct rt6_info),
113 };
114
115 struct rt6_info ip6_null_entry = {
116         .u = {
117                 .dst = {
118                         .__refcnt       = ATOMIC_INIT(1),
119                         .__use          = 1,
120                         .dev            = &loopback_dev,
121                         .obsolete       = -1,
122                         .error          = -ENETUNREACH,
123                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
124                         .input          = ip6_pkt_discard,
125                         .output         = ip6_pkt_discard_out,
126                         .ops            = &ip6_dst_ops,
127                         .path           = (struct dst_entry*)&ip6_null_entry,
128                 }
129         },
130         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
131         .rt6i_metric    = ~(u32) 0,
132         .rt6i_ref       = ATOMIC_INIT(1),
133 };
134
135 struct fib6_node ip6_routing_table = {
136         .leaf           = &ip6_null_entry,
137         .fn_flags       = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
138 };
139
140 /* Protects all the ip6 fib */
141
142 DEFINE_RWLOCK(rt6_lock);
143
144
145 /* allocate dst with ip6_dst_ops */
146 static __inline__ struct rt6_info *ip6_dst_alloc(void)
147 {
148         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
149 }
150
151 static void ip6_dst_destroy(struct dst_entry *dst)
152 {
153         struct rt6_info *rt = (struct rt6_info *)dst;
154         struct inet6_dev *idev = rt->rt6i_idev;
155
156         if (idev != NULL) {
157                 rt->rt6i_idev = NULL;
158                 in6_dev_put(idev);
159         }       
160 }
161
162 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
163                            int how)
164 {
165         struct rt6_info *rt = (struct rt6_info *)dst;
166         struct inet6_dev *idev = rt->rt6i_idev;
167
168         if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
169                 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
170                 if (loopback_idev != NULL) {
171                         rt->rt6i_idev = loopback_idev;
172                         in6_dev_put(idev);
173                 }
174         }
175 }
176
177 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
178 {
179         return (rt->rt6i_flags & RTF_EXPIRES &&
180                 time_after(jiffies, rt->rt6i_expires));
181 }
182
183 /*
184  *      Route lookup. Any rt6_lock is implied.
185  */
186
187 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
188                                                     int oif,
189                                                     int strict)
190 {
191         struct rt6_info *local = NULL;
192         struct rt6_info *sprt;
193
194         if (oif) {
195                 for (sprt = rt; sprt; sprt = sprt->u.next) {
196                         struct net_device *dev = sprt->rt6i_dev;
197                         if (dev->ifindex == oif)
198                                 return sprt;
199                         if (dev->flags & IFF_LOOPBACK) {
200                                 if (sprt->rt6i_idev == NULL ||
201                                     sprt->rt6i_idev->dev->ifindex != oif) {
202                                         if (strict && oif)
203                                                 continue;
204                                         if (local && (!oif || 
205                                                       local->rt6i_idev->dev->ifindex == oif))
206                                                 continue;
207                                 }
208                                 local = sprt;
209                         }
210                 }
211
212                 if (local)
213                         return local;
214
215                 if (strict)
216                         return &ip6_null_entry;
217         }
218         return rt;
219 }
220
221 #ifdef CONFIG_IPV6_ROUTER_PREF
222 static void rt6_probe(struct rt6_info *rt)
223 {
224         struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
225         /*
226          * Okay, this does not seem to be appropriate
227          * for now, however, we need to check if it
228          * is really so; aka Router Reachability Probing.
229          *
230          * Router Reachability Probe MUST be rate-limited
231          * to no more than one per minute.
232          */
233         if (!neigh || (neigh->nud_state & NUD_VALID))
234                 return;
235         read_lock_bh(&neigh->lock);
236         if (!(neigh->nud_state & NUD_VALID) &&
237             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
238                 struct in6_addr mcaddr;
239                 struct in6_addr *target;
240
241                 neigh->updated = jiffies;
242                 read_unlock_bh(&neigh->lock);
243
244                 target = (struct in6_addr *)&neigh->primary_key;
245                 addrconf_addr_solict_mult(target, &mcaddr);
246                 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
247         } else
248                 read_unlock_bh(&neigh->lock);
249 }
250 #else
251 static inline void rt6_probe(struct rt6_info *rt)
252 {
253         return;
254 }
255 #endif
256
257 /*
258  * Default Router Selection (RFC 2461 6.3.6)
259  */
260 static int inline rt6_check_dev(struct rt6_info *rt, int oif)
261 {
262         struct net_device *dev = rt->rt6i_dev;
263         if (!oif || dev->ifindex == oif)
264                 return 2;
265         if ((dev->flags & IFF_LOOPBACK) &&
266             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
267                 return 1;
268         return 0;
269 }
270
271 static int inline rt6_check_neigh(struct rt6_info *rt)
272 {
273         struct neighbour *neigh = rt->rt6i_nexthop;
274         int m = 0;
275         if (neigh) {
276                 read_lock_bh(&neigh->lock);
277                 if (neigh->nud_state & NUD_VALID)
278                         m = 1;
279                 read_unlock_bh(&neigh->lock);
280         }
281         return m;
282 }
283
284 static int rt6_score_route(struct rt6_info *rt, int oif,
285                            int strict)
286 {
287         int m = rt6_check_dev(rt, oif);
288         if (!m && (strict & RT6_SELECT_F_IFACE))
289                 return -1;
290 #ifdef CONFIG_IPV6_ROUTER_PREF
291         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
292 #endif
293         if (rt6_check_neigh(rt))
294                 m |= 16;
295         else if (strict & RT6_SELECT_F_REACHABLE)
296                 return -1;
297         return m;
298 }
299
300 static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
301                                    int strict)
302 {
303         struct rt6_info *match = NULL, *last = NULL;
304         struct rt6_info *rt, *rt0 = *head;
305         u32 metric;
306         int mpri = -1;
307
308         RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
309                   __FUNCTION__, head, head ? *head : NULL, oif);
310
311         for (rt = rt0, metric = rt0->rt6i_metric;
312              rt && rt->rt6i_metric == metric;
313              rt = rt->u.next) {
314                 int m;
315
316                 if (rt6_check_expired(rt))
317                         continue;
318
319                 last = rt;
320
321                 m = rt6_score_route(rt, oif, strict);
322                 if (m < 0)
323                         continue;
324
325                 if (m > mpri) {
326                         rt6_probe(match);
327                         match = rt;
328                         mpri = m;
329                 } else {
330                         rt6_probe(rt);
331                 }
332         }
333
334         if (!match &&
335             (strict & RT6_SELECT_F_REACHABLE) &&
336             last && last != rt0) {
337                 /* no entries matched; do round-robin */
338                 *head = rt0->u.next;
339                 rt0->u.next = last->u.next;
340                 last->u.next = rt0;
341         }
342
343         RT6_TRACE("%s() => %p, score=%d\n",
344                   __FUNCTION__, match, mpri);
345
346         return (match ? match : &ip6_null_entry);
347 }
348
349 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
350                             int oif, int strict)
351 {
352         struct fib6_node *fn;
353         struct rt6_info *rt;
354
355         read_lock_bh(&rt6_lock);
356         fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
357         rt = rt6_device_match(fn->leaf, oif, strict);
358         dst_hold(&rt->u.dst);
359         rt->u.dst.__use++;
360         read_unlock_bh(&rt6_lock);
361
362         rt->u.dst.lastuse = jiffies;
363         if (rt->u.dst.error == 0)
364                 return rt;
365         dst_release(&rt->u.dst);
366         return NULL;
367 }
368
369 /* ip6_ins_rt is called with FREE rt6_lock.
370    It takes new route entry, the addition fails by any reason the
371    route is freed. In any case, if caller does not hold it, it may
372    be destroyed.
373  */
374
375 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
376                 void *_rtattr, struct netlink_skb_parms *req)
377 {
378         int err;
379
380         write_lock_bh(&rt6_lock);
381         err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
382         write_unlock_bh(&rt6_lock);
383
384         return err;
385 }
386
387 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
388                                       struct in6_addr *saddr)
389 {
390         struct rt6_info *rt;
391
392         /*
393          *      Clone the route.
394          */
395
396         rt = ip6_rt_copy(ort);
397
398         if (rt) {
399                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
400                         if (rt->rt6i_dst.plen != 128 &&
401                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
402                                 rt->rt6i_flags |= RTF_ANYCAST;
403                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
404                 }
405
406                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
407                 rt->rt6i_dst.plen = 128;
408                 rt->rt6i_flags |= RTF_CACHE;
409                 rt->u.dst.flags |= DST_HOST;
410
411 #ifdef CONFIG_IPV6_SUBTREES
412                 if (rt->rt6i_src.plen && saddr) {
413                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
414                         rt->rt6i_src.plen = 128;
415                 }
416 #endif
417
418                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
419
420         }
421
422         return rt;
423 }
424
425 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
426 {
427         struct rt6_info *rt = ip6_rt_copy(ort);
428         if (rt) {
429                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
430                 rt->rt6i_dst.plen = 128;
431                 rt->rt6i_flags |= RTF_CACHE;
432                 if (rt->rt6i_flags & RTF_REJECT)
433                         rt->u.dst.error = ort->u.dst.error;
434                 rt->u.dst.flags |= DST_HOST;
435                 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
436         }
437         return rt;
438 }
439
440 #define BACKTRACK() \
441 if (rt == &ip6_null_entry) { \
442        while ((fn = fn->parent) != NULL) { \
443                 if (fn->fn_flags & RTN_ROOT) { \
444                         goto out; \
445                 } \
446                 if (fn->fn_flags & RTN_RTINFO) \
447                         goto restart; \
448         } \
449 }
450
451
452 void ip6_route_input(struct sk_buff *skb)
453 {
454         struct fib6_node *fn;
455         struct rt6_info *rt, *nrt;
456         int strict;
457         int attempts = 3;
458         int err;
459         int reachable = RT6_SELECT_F_REACHABLE;
460
461         strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
462
463 relookup:
464         read_lock_bh(&rt6_lock);
465
466 restart_2:
467         fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
468                          &skb->nh.ipv6h->saddr);
469
470 restart:
471         rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable);
472         BACKTRACK();
473         if (rt == &ip6_null_entry ||
474             rt->rt6i_flags & RTF_CACHE)
475                 goto out;
476
477         dst_hold(&rt->u.dst);
478         read_unlock_bh(&rt6_lock);
479
480         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
481                 nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
482         else {
483 #if CLONE_OFFLINK_ROUTE
484                 nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
485 #else
486                 goto out2;
487 #endif
488         }
489
490         dst_release(&rt->u.dst);
491         rt = nrt ? : &ip6_null_entry;
492
493         dst_hold(&rt->u.dst);
494         if (nrt) {
495                 err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
496                 if (!err)
497                         goto out2;
498         }
499
500         if (--attempts <= 0)
501                 goto out2;
502
503         /*
504          * Race condition! In the gap, when rt6_lock was
505          * released someone could insert this route.  Relookup.
506          */
507         dst_release(&rt->u.dst);
508         goto relookup;
509
510 out:
511         if (reachable) {
512                 reachable = 0;
513                 goto restart_2;
514         }
515         dst_hold(&rt->u.dst);
516         read_unlock_bh(&rt6_lock);
517 out2:
518         rt->u.dst.lastuse = jiffies;
519         rt->u.dst.__use++;
520         skb->dst = (struct dst_entry *) rt;
521         return;
522 }
523
524 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
525 {
526         struct fib6_node *fn;
527         struct rt6_info *rt, *nrt;
528         int strict;
529         int attempts = 3;
530         int err;
531         int reachable = RT6_SELECT_F_REACHABLE;
532
533         strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
534
535 relookup:
536         read_lock_bh(&rt6_lock);
537
538 restart_2:
539         fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
540
541 restart:
542         rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
543         BACKTRACK();
544         if (rt == &ip6_null_entry ||
545             rt->rt6i_flags & RTF_CACHE)
546                 goto out;
547
548         dst_hold(&rt->u.dst);
549         read_unlock_bh(&rt6_lock);
550
551         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
552                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
553         else {
554 #if CLONE_OFFLINK_ROUTE
555                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
556 #else
557                 goto out2;
558 #endif
559         }
560
561         dst_release(&rt->u.dst);
562         rt = nrt ? : &ip6_null_entry;
563
564         dst_hold(&rt->u.dst);
565         if (nrt) {
566                 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
567                 if (!err)
568                         goto out2;
569         }
570
571         if (--attempts <= 0)
572                 goto out2;
573
574         /*
575          * Race condition! In the gap, when rt6_lock was
576          * released someone could insert this route.  Relookup.
577          */
578         dst_release(&rt->u.dst);
579         goto relookup;
580
581 out:
582         if (reachable) {
583                 reachable = 0;
584                 goto restart_2;
585         }
586         dst_hold(&rt->u.dst);
587         read_unlock_bh(&rt6_lock);
588 out2:
589         rt->u.dst.lastuse = jiffies;
590         rt->u.dst.__use++;
591         return &rt->u.dst;
592 }
593
594
595 /*
596  *      Destination cache support functions
597  */
598
599 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
600 {
601         struct rt6_info *rt;
602
603         rt = (struct rt6_info *) dst;
604
605         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
606                 return dst;
607
608         return NULL;
609 }
610
611 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
612 {
613         struct rt6_info *rt = (struct rt6_info *) dst;
614
615         if (rt) {
616                 if (rt->rt6i_flags & RTF_CACHE)
617                         ip6_del_rt(rt, NULL, NULL, NULL);
618                 else
619                         dst_release(dst);
620         }
621         return NULL;
622 }
623
624 static void ip6_link_failure(struct sk_buff *skb)
625 {
626         struct rt6_info *rt;
627
628         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
629
630         rt = (struct rt6_info *) skb->dst;
631         if (rt) {
632                 if (rt->rt6i_flags&RTF_CACHE) {
633                         dst_set_expires(&rt->u.dst, 0);
634                         rt->rt6i_flags |= RTF_EXPIRES;
635                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
636                         rt->rt6i_node->fn_sernum = -1;
637         }
638 }
639
640 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
641 {
642         struct rt6_info *rt6 = (struct rt6_info*)dst;
643
644         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
645                 rt6->rt6i_flags |= RTF_MODIFIED;
646                 if (mtu < IPV6_MIN_MTU) {
647                         mtu = IPV6_MIN_MTU;
648                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
649                 }
650                 dst->metrics[RTAX_MTU-1] = mtu;
651         }
652 }
653
654 /* Protected by rt6_lock.  */
655 static struct dst_entry *ndisc_dst_gc_list;
656 static int ipv6_get_mtu(struct net_device *dev);
657
658 static inline unsigned int ipv6_advmss(unsigned int mtu)
659 {
660         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
661
662         if (mtu < ip6_rt_min_advmss)
663                 mtu = ip6_rt_min_advmss;
664
665         /*
666          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
667          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
668          * IPV6_MAXPLEN is also valid and means: "any MSS, 
669          * rely only on pmtu discovery"
670          */
671         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
672                 mtu = IPV6_MAXPLEN;
673         return mtu;
674 }
675
676 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
677                                   struct neighbour *neigh,
678                                   struct in6_addr *addr,
679                                   int (*output)(struct sk_buff *))
680 {
681         struct rt6_info *rt;
682         struct inet6_dev *idev = in6_dev_get(dev);
683
684         if (unlikely(idev == NULL))
685                 return NULL;
686
687         rt = ip6_dst_alloc();
688         if (unlikely(rt == NULL)) {
689                 in6_dev_put(idev);
690                 goto out;
691         }
692
693         dev_hold(dev);
694         if (neigh)
695                 neigh_hold(neigh);
696         else
697                 neigh = ndisc_get_neigh(dev, addr);
698
699         rt->rt6i_dev      = dev;
700         rt->rt6i_idev     = idev;
701         rt->rt6i_nexthop  = neigh;
702         atomic_set(&rt->u.dst.__refcnt, 1);
703         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
704         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
705         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
706         rt->u.dst.output  = output;
707
708 #if 0   /* there's no chance to use these for ndisc */
709         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
710                                 ? DST_HOST 
711                                 : 0;
712         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
713         rt->rt6i_dst.plen = 128;
714 #endif
715
716         write_lock_bh(&rt6_lock);
717         rt->u.dst.next = ndisc_dst_gc_list;
718         ndisc_dst_gc_list = &rt->u.dst;
719         write_unlock_bh(&rt6_lock);
720
721         fib6_force_start_gc();
722
723 out:
724         return (struct dst_entry *)rt;
725 }
726
727 int ndisc_dst_gc(int *more)
728 {
729         struct dst_entry *dst, *next, **pprev;
730         int freed;
731
732         next = NULL;
733         pprev = &ndisc_dst_gc_list;
734         freed = 0;
735         while ((dst = *pprev) != NULL) {
736                 if (!atomic_read(&dst->__refcnt)) {
737                         *pprev = dst->next;
738                         dst_free(dst);
739                         freed++;
740                 } else {
741                         pprev = &dst->next;
742                         (*more)++;
743                 }
744         }
745
746         return freed;
747 }
748
749 static int ip6_dst_gc(void)
750 {
751         static unsigned expire = 30*HZ;
752         static unsigned long last_gc;
753         unsigned long now = jiffies;
754
755         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
756             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
757                 goto out;
758
759         expire++;
760         fib6_run_gc(expire);
761         last_gc = now;
762         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
763                 expire = ip6_rt_gc_timeout>>1;
764
765 out:
766         expire -= expire>>ip6_rt_gc_elasticity;
767         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
768 }
769
770 /* Clean host part of a prefix. Not necessary in radix tree,
771    but results in cleaner routing tables.
772
773    Remove it only when all the things will work!
774  */
775
776 static int ipv6_get_mtu(struct net_device *dev)
777 {
778         int mtu = IPV6_MIN_MTU;
779         struct inet6_dev *idev;
780
781         idev = in6_dev_get(dev);
782         if (idev) {
783                 mtu = idev->cnf.mtu6;
784                 in6_dev_put(idev);
785         }
786         return mtu;
787 }
788
789 int ipv6_get_hoplimit(struct net_device *dev)
790 {
791         int hoplimit = ipv6_devconf.hop_limit;
792         struct inet6_dev *idev;
793
794         idev = in6_dev_get(dev);
795         if (idev) {
796                 hoplimit = idev->cnf.hop_limit;
797                 in6_dev_put(idev);
798         }
799         return hoplimit;
800 }
801
802 /*
803  *
804  */
805
806 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, 
807                 void *_rtattr, struct netlink_skb_parms *req)
808 {
809         int err;
810         struct rtmsg *r;
811         struct rtattr **rta;
812         struct rt6_info *rt = NULL;
813         struct net_device *dev = NULL;
814         struct inet6_dev *idev = NULL;
815         int addr_type;
816
817         rta = (struct rtattr **) _rtattr;
818
819         if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
820                 return -EINVAL;
821 #ifndef CONFIG_IPV6_SUBTREES
822         if (rtmsg->rtmsg_src_len)
823                 return -EINVAL;
824 #endif
825         if (rtmsg->rtmsg_ifindex) {
826                 err = -ENODEV;
827                 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
828                 if (!dev)
829                         goto out;
830                 idev = in6_dev_get(dev);
831                 if (!idev)
832                         goto out;
833         }
834
835         if (rtmsg->rtmsg_metric == 0)
836                 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
837
838         rt = ip6_dst_alloc();
839
840         if (rt == NULL) {
841                 err = -ENOMEM;
842                 goto out;
843         }
844
845         rt->u.dst.obsolete = -1;
846         rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
847         if (nlh && (r = NLMSG_DATA(nlh))) {
848                 rt->rt6i_protocol = r->rtm_protocol;
849         } else {
850                 rt->rt6i_protocol = RTPROT_BOOT;
851         }
852
853         addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
854
855         if (addr_type & IPV6_ADDR_MULTICAST)
856                 rt->u.dst.input = ip6_mc_input;
857         else
858                 rt->u.dst.input = ip6_forward;
859
860         rt->u.dst.output = ip6_output;
861
862         ipv6_addr_prefix(&rt->rt6i_dst.addr, 
863                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
864         rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
865         if (rt->rt6i_dst.plen == 128)
866                rt->u.dst.flags = DST_HOST;
867
868 #ifdef CONFIG_IPV6_SUBTREES
869         ipv6_addr_prefix(&rt->rt6i_src.addr, 
870                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
871         rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
872 #endif
873
874         rt->rt6i_metric = rtmsg->rtmsg_metric;
875
876         /* We cannot add true routes via loopback here,
877            they would result in kernel looping; promote them to reject routes
878          */
879         if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
880             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
881                 /* hold loopback dev/idev if we haven't done so. */
882                 if (dev != &loopback_dev) {
883                         if (dev) {
884                                 dev_put(dev);
885                                 in6_dev_put(idev);
886                         }
887                         dev = &loopback_dev;
888                         dev_hold(dev);
889                         idev = in6_dev_get(dev);
890                         if (!idev) {
891                                 err = -ENODEV;
892                                 goto out;
893                         }
894                 }
895                 rt->u.dst.output = ip6_pkt_discard_out;
896                 rt->u.dst.input = ip6_pkt_discard;
897                 rt->u.dst.error = -ENETUNREACH;
898                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
899                 goto install_route;
900         }
901
902         if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
903                 struct in6_addr *gw_addr;
904                 int gwa_type;
905
906                 gw_addr = &rtmsg->rtmsg_gateway;
907                 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
908                 gwa_type = ipv6_addr_type(gw_addr);
909
910                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
911                         struct rt6_info *grt;
912
913                         /* IPv6 strictly inhibits using not link-local
914                            addresses as nexthop address.
915                            Otherwise, router will not able to send redirects.
916                            It is very good, but in some (rare!) circumstances
917                            (SIT, PtP, NBMA NOARP links) it is handy to allow
918                            some exceptions. --ANK
919                          */
920                         err = -EINVAL;
921                         if (!(gwa_type&IPV6_ADDR_UNICAST))
922                                 goto out;
923
924                         grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
925
926                         err = -EHOSTUNREACH;
927                         if (grt == NULL)
928                                 goto out;
929                         if (dev) {
930                                 if (dev != grt->rt6i_dev) {
931                                         dst_release(&grt->u.dst);
932                                         goto out;
933                                 }
934                         } else {
935                                 dev = grt->rt6i_dev;
936                                 idev = grt->rt6i_idev;
937                                 dev_hold(dev);
938                                 in6_dev_hold(grt->rt6i_idev);
939                         }
940                         if (!(grt->rt6i_flags&RTF_GATEWAY))
941                                 err = 0;
942                         dst_release(&grt->u.dst);
943
944                         if (err)
945                                 goto out;
946                 }
947                 err = -EINVAL;
948                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
949                         goto out;
950         }
951
952         err = -ENODEV;
953         if (dev == NULL)
954                 goto out;
955
956         if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
957                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
958                 if (IS_ERR(rt->rt6i_nexthop)) {
959                         err = PTR_ERR(rt->rt6i_nexthop);
960                         rt->rt6i_nexthop = NULL;
961                         goto out;
962                 }
963         }
964
965         rt->rt6i_flags = rtmsg->rtmsg_flags;
966
967 install_route:
968         if (rta && rta[RTA_METRICS-1]) {
969                 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
970                 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
971
972                 while (RTA_OK(attr, attrlen)) {
973                         unsigned flavor = attr->rta_type;
974                         if (flavor) {
975                                 if (flavor > RTAX_MAX) {
976                                         err = -EINVAL;
977                                         goto out;
978                                 }
979                                 rt->u.dst.metrics[flavor-1] =
980                                         *(u32 *)RTA_DATA(attr);
981                         }
982                         attr = RTA_NEXT(attr, attrlen);
983                 }
984         }
985
986         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
987                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
988         if (!rt->u.dst.metrics[RTAX_MTU-1])
989                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
990         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
991                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
992         rt->u.dst.dev = dev;
993         rt->rt6i_idev = idev;
994         return ip6_ins_rt(rt, nlh, _rtattr, req);
995
996 out:
997         if (dev)
998                 dev_put(dev);
999         if (idev)
1000                 in6_dev_put(idev);
1001         if (rt)
1002                 dst_free((struct dst_entry *) rt);
1003         return err;
1004 }
1005
1006 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1007 {
1008         int err;
1009
1010         write_lock_bh(&rt6_lock);
1011
1012         err = fib6_del(rt, nlh, _rtattr, req);
1013         dst_release(&rt->u.dst);
1014
1015         write_unlock_bh(&rt6_lock);
1016
1017         return err;
1018 }
1019
1020 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1021 {
1022         struct fib6_node *fn;
1023         struct rt6_info *rt;
1024         int err = -ESRCH;
1025
1026         read_lock_bh(&rt6_lock);
1027
1028         fn = fib6_locate(&ip6_routing_table,
1029                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1030                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1031         
1032         if (fn) {
1033                 for (rt = fn->leaf; rt; rt = rt->u.next) {
1034                         if (rtmsg->rtmsg_ifindex &&
1035                             (rt->rt6i_dev == NULL ||
1036                              rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1037                                 continue;
1038                         if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1039                             !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1040                                 continue;
1041                         if (rtmsg->rtmsg_metric &&
1042                             rtmsg->rtmsg_metric != rt->rt6i_metric)
1043                                 continue;
1044                         dst_hold(&rt->u.dst);
1045                         read_unlock_bh(&rt6_lock);
1046
1047                         return ip6_del_rt(rt, nlh, _rtattr, req);
1048                 }
1049         }
1050         read_unlock_bh(&rt6_lock);
1051
1052         return err;
1053 }
1054
1055 /*
1056  *      Handle redirects
1057  */
1058 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1059                   struct neighbour *neigh, u8 *lladdr, int on_link)
1060 {
1061         struct rt6_info *rt, *nrt;
1062
1063         /* Locate old route to this destination. */
1064         rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1065
1066         if (rt == NULL)
1067                 return;
1068
1069         if (neigh->dev != rt->rt6i_dev)
1070                 goto out;
1071
1072         /*
1073          * Current route is on-link; redirect is always invalid.
1074          * 
1075          * Seems, previous statement is not true. It could
1076          * be node, which looks for us as on-link (f.e. proxy ndisc)
1077          * But then router serving it might decide, that we should
1078          * know truth 8)8) --ANK (980726).
1079          */
1080         if (!(rt->rt6i_flags&RTF_GATEWAY))
1081                 goto out;
1082
1083         /*
1084          *      RFC 2461 specifies that redirects should only be
1085          *      accepted if they come from the nexthop to the target.
1086          *      Due to the way default routers are chosen, this notion
1087          *      is a bit fuzzy and one might need to check all default
1088          *      routers.
1089          */
1090         if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1091                 if (rt->rt6i_flags & RTF_DEFAULT) {
1092                         struct rt6_info *rt1;
1093
1094                         read_lock(&rt6_lock);
1095                         for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1096                                 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1097                                         dst_hold(&rt1->u.dst);
1098                                         dst_release(&rt->u.dst);
1099                                         read_unlock(&rt6_lock);
1100                                         rt = rt1;
1101                                         goto source_ok;
1102                                 }
1103                         }
1104                         read_unlock(&rt6_lock);
1105                 }
1106                 if (net_ratelimit())
1107                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1108                                "for redirect target\n");
1109                 goto out;
1110         }
1111
1112 source_ok:
1113
1114         /*
1115          *      We have finally decided to accept it.
1116          */
1117
1118         neigh_update(neigh, lladdr, NUD_STALE, 
1119                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1120                      NEIGH_UPDATE_F_OVERRIDE|
1121                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1122                                      NEIGH_UPDATE_F_ISROUTER))
1123                      );
1124
1125         /*
1126          * Redirect received -> path was valid.
1127          * Look, redirects are sent only in response to data packets,
1128          * so that this nexthop apparently is reachable. --ANK
1129          */
1130         dst_confirm(&rt->u.dst);
1131
1132         /* Duplicate redirect: silently ignore. */
1133         if (neigh == rt->u.dst.neighbour)
1134                 goto out;
1135
1136         nrt = ip6_rt_copy(rt);
1137         if (nrt == NULL)
1138                 goto out;
1139
1140         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1141         if (on_link)
1142                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1143
1144         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1145         nrt->rt6i_dst.plen = 128;
1146         nrt->u.dst.flags |= DST_HOST;
1147
1148         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1149         nrt->rt6i_nexthop = neigh_clone(neigh);
1150         /* Reset pmtu, it may be better */
1151         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1152         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1153
1154         if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1155                 goto out;
1156
1157         if (rt->rt6i_flags&RTF_CACHE) {
1158                 ip6_del_rt(rt, NULL, NULL, NULL);
1159                 return;
1160         }
1161
1162 out:
1163         dst_release(&rt->u.dst);
1164         return;
1165 }
1166
1167 /*
1168  *      Handle ICMP "packet too big" messages
1169  *      i.e. Path MTU discovery
1170  */
1171
1172 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1173                         struct net_device *dev, u32 pmtu)
1174 {
1175         struct rt6_info *rt, *nrt;
1176         int allfrag = 0;
1177
1178         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1179         if (rt == NULL)
1180                 return;
1181
1182         if (pmtu >= dst_mtu(&rt->u.dst))
1183                 goto out;
1184
1185         if (pmtu < IPV6_MIN_MTU) {
1186                 /*
1187                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link 
1188                  * MTU (1280) and a fragment header should always be included
1189                  * after a node receiving Too Big message reporting PMTU is
1190                  * less than the IPv6 Minimum Link MTU.
1191                  */
1192                 pmtu = IPV6_MIN_MTU;
1193                 allfrag = 1;
1194         }
1195
1196         /* New mtu received -> path was valid.
1197            They are sent only in response to data packets,
1198            so that this nexthop apparently is reachable. --ANK
1199          */
1200         dst_confirm(&rt->u.dst);
1201
1202         /* Host route. If it is static, it would be better
1203            not to override it, but add new one, so that
1204            when cache entry will expire old pmtu
1205            would return automatically.
1206          */
1207         if (rt->rt6i_flags & RTF_CACHE) {
1208                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1209                 if (allfrag)
1210                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1211                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1212                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1213                 goto out;
1214         }
1215
1216         /* Network route.
1217            Two cases are possible:
1218            1. It is connected route. Action: COW
1219            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1220          */
1221         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1222                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1223         else
1224                 nrt = rt6_alloc_clone(rt, daddr);
1225
1226         if (nrt) {
1227                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1228                 if (allfrag)
1229                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1230
1231                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1232                  * happened within 5 mins, the recommended timer is 10 mins.
1233                  * Here this route expiration time is set to ip6_rt_mtu_expires
1234                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1235                  * and detecting PMTU increase will be automatically happened.
1236                  */
1237                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1238                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1239
1240                 ip6_ins_rt(nrt, NULL, NULL, NULL);
1241         }
1242 out:
1243         dst_release(&rt->u.dst);
1244 }
1245
1246 /*
1247  *      Misc support functions
1248  */
1249
1250 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1251 {
1252         struct rt6_info *rt = ip6_dst_alloc();
1253
1254         if (rt) {
1255                 rt->u.dst.input = ort->u.dst.input;
1256                 rt->u.dst.output = ort->u.dst.output;
1257
1258                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1259                 rt->u.dst.dev = ort->u.dst.dev;
1260                 if (rt->u.dst.dev)
1261                         dev_hold(rt->u.dst.dev);
1262                 rt->rt6i_idev = ort->rt6i_idev;
1263                 if (rt->rt6i_idev)
1264                         in6_dev_hold(rt->rt6i_idev);
1265                 rt->u.dst.lastuse = jiffies;
1266                 rt->rt6i_expires = 0;
1267
1268                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1269                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1270                 rt->rt6i_metric = 0;
1271
1272                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1273 #ifdef CONFIG_IPV6_SUBTREES
1274                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1275 #endif
1276         }
1277         return rt;
1278 }
1279
1280 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1281 {       
1282         struct rt6_info *rt;
1283         struct fib6_node *fn;
1284
1285         fn = &ip6_routing_table;
1286
1287         write_lock_bh(&rt6_lock);
1288         for (rt = fn->leaf; rt; rt=rt->u.next) {
1289                 if (dev == rt->rt6i_dev &&
1290                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1291                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1292                         break;
1293         }
1294         if (rt)
1295                 dst_hold(&rt->u.dst);
1296         write_unlock_bh(&rt6_lock);
1297         return rt;
1298 }
1299
1300 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1301                                      struct net_device *dev,
1302                                      unsigned int pref)
1303 {
1304         struct in6_rtmsg rtmsg;
1305
1306         memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1307         rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1308         ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1309         rtmsg.rtmsg_metric = 1024;
1310         rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES |
1311                             RTF_PREF(pref);
1312
1313         rtmsg.rtmsg_ifindex = dev->ifindex;
1314
1315         ip6_route_add(&rtmsg, NULL, NULL, NULL);
1316         return rt6_get_dflt_router(gwaddr, dev);
1317 }
1318
1319 void rt6_purge_dflt_routers(void)
1320 {
1321         struct rt6_info *rt;
1322
1323 restart:
1324         read_lock_bh(&rt6_lock);
1325         for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1326                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1327                         dst_hold(&rt->u.dst);
1328
1329                         read_unlock_bh(&rt6_lock);
1330
1331                         ip6_del_rt(rt, NULL, NULL, NULL);
1332
1333                         goto restart;
1334                 }
1335         }
1336         read_unlock_bh(&rt6_lock);
1337 }
1338
1339 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1340 {
1341         struct in6_rtmsg rtmsg;
1342         int err;
1343
1344         switch(cmd) {
1345         case SIOCADDRT:         /* Add a route */
1346         case SIOCDELRT:         /* Delete a route */
1347                 if (!capable(CAP_NET_ADMIN))
1348                         return -EPERM;
1349                 err = copy_from_user(&rtmsg, arg,
1350                                      sizeof(struct in6_rtmsg));
1351                 if (err)
1352                         return -EFAULT;
1353                         
1354                 rtnl_lock();
1355                 switch (cmd) {
1356                 case SIOCADDRT:
1357                         err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1358                         break;
1359                 case SIOCDELRT:
1360                         err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1361                         break;
1362                 default:
1363                         err = -EINVAL;
1364                 }
1365                 rtnl_unlock();
1366
1367                 return err;
1368         };
1369
1370         return -EINVAL;
1371 }
1372
1373 /*
1374  *      Drop the packet on the floor
1375  */
1376
1377 static int ip6_pkt_discard(struct sk_buff *skb)
1378 {
1379         IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1380         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1381         kfree_skb(skb);
1382         return 0;
1383 }
1384
1385 static int ip6_pkt_discard_out(struct sk_buff *skb)
1386 {
1387         skb->dev = skb->dst->dev;
1388         return ip6_pkt_discard(skb);
1389 }
1390
1391 /*
1392  *      Allocate a dst for local (unicast / anycast) address.
1393  */
1394
1395 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1396                                     const struct in6_addr *addr,
1397                                     int anycast)
1398 {
1399         struct rt6_info *rt = ip6_dst_alloc();
1400
1401         if (rt == NULL)
1402                 return ERR_PTR(-ENOMEM);
1403
1404         dev_hold(&loopback_dev);
1405         in6_dev_hold(idev);
1406
1407         rt->u.dst.flags = DST_HOST;
1408         rt->u.dst.input = ip6_input;
1409         rt->u.dst.output = ip6_output;
1410         rt->rt6i_dev = &loopback_dev;
1411         rt->rt6i_idev = idev;
1412         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1413         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1414         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1415         rt->u.dst.obsolete = -1;
1416
1417         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1418         if (anycast)
1419                 rt->rt6i_flags |= RTF_ANYCAST;
1420         else
1421                 rt->rt6i_flags |= RTF_LOCAL;
1422         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1423         if (rt->rt6i_nexthop == NULL) {
1424                 dst_free((struct dst_entry *) rt);
1425                 return ERR_PTR(-ENOMEM);
1426         }
1427
1428         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1429         rt->rt6i_dst.plen = 128;
1430
1431         atomic_set(&rt->u.dst.__refcnt, 1);
1432
1433         return rt;
1434 }
1435
1436 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1437 {
1438         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1439             rt != &ip6_null_entry) {
1440                 RT6_TRACE("deleted by ifdown %p\n", rt);
1441                 return -1;
1442         }
1443         return 0;
1444 }
1445
1446 void rt6_ifdown(struct net_device *dev)
1447 {
1448         write_lock_bh(&rt6_lock);
1449         fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1450         write_unlock_bh(&rt6_lock);
1451 }
1452
1453 struct rt6_mtu_change_arg
1454 {
1455         struct net_device *dev;
1456         unsigned mtu;
1457 };
1458
1459 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1460 {
1461         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1462         struct inet6_dev *idev;
1463
1464         /* In IPv6 pmtu discovery is not optional,
1465            so that RTAX_MTU lock cannot disable it.
1466            We still use this lock to block changes
1467            caused by addrconf/ndisc.
1468         */
1469
1470         idev = __in6_dev_get(arg->dev);
1471         if (idev == NULL)
1472                 return 0;
1473
1474         /* For administrative MTU increase, there is no way to discover
1475            IPv6 PMTU increase, so PMTU increase should be updated here.
1476            Since RFC 1981 doesn't include administrative MTU increase
1477            update PMTU increase is a MUST. (i.e. jumbo frame)
1478          */
1479         /*
1480            If new MTU is less than route PMTU, this new MTU will be the
1481            lowest MTU in the path, update the route PMTU to reflect PMTU
1482            decreases; if new MTU is greater than route PMTU, and the
1483            old MTU is the lowest MTU in the path, update the route PMTU
1484            to reflect the increase. In this case if the other nodes' MTU
1485            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1486            PMTU discouvery.
1487          */
1488         if (rt->rt6i_dev == arg->dev &&
1489             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1490             (dst_mtu(&rt->u.dst) > arg->mtu ||
1491              (dst_mtu(&rt->u.dst) < arg->mtu &&
1492               dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1493                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1494         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1495         return 0;
1496 }
1497
1498 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1499 {
1500         struct rt6_mtu_change_arg arg;
1501
1502         arg.dev = dev;
1503         arg.mtu = mtu;
1504         read_lock_bh(&rt6_lock);
1505         fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1506         read_unlock_bh(&rt6_lock);
1507 }
1508
1509 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1510                               struct in6_rtmsg *rtmsg)
1511 {
1512         memset(rtmsg, 0, sizeof(*rtmsg));
1513
1514         rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1515         rtmsg->rtmsg_src_len = r->rtm_src_len;
1516         rtmsg->rtmsg_flags = RTF_UP;
1517         if (r->rtm_type == RTN_UNREACHABLE)
1518                 rtmsg->rtmsg_flags |= RTF_REJECT;
1519
1520         if (rta[RTA_GATEWAY-1]) {
1521                 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1522                         return -EINVAL;
1523                 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1524                 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1525         }
1526         if (rta[RTA_DST-1]) {
1527                 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1528                         return -EINVAL;
1529                 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1530         }
1531         if (rta[RTA_SRC-1]) {
1532                 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1533                         return -EINVAL;
1534                 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1535         }
1536         if (rta[RTA_OIF-1]) {
1537                 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1538                         return -EINVAL;
1539                 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1540         }
1541         if (rta[RTA_PRIORITY-1]) {
1542                 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1543                         return -EINVAL;
1544                 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1545         }
1546         return 0;
1547 }
1548
1549 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1550 {
1551         struct rtmsg *r = NLMSG_DATA(nlh);
1552         struct in6_rtmsg rtmsg;
1553
1554         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1555                 return -EINVAL;
1556         return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1557 }
1558
1559 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1560 {
1561         struct rtmsg *r = NLMSG_DATA(nlh);
1562         struct in6_rtmsg rtmsg;
1563
1564         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1565                 return -EINVAL;
1566         return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1567 }
1568
1569 struct rt6_rtnl_dump_arg
1570 {
1571         struct sk_buff *skb;
1572         struct netlink_callback *cb;
1573 };
1574
1575 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1576                          struct in6_addr *dst, struct in6_addr *src,
1577                          int iif, int type, u32 pid, u32 seq,
1578                          int prefix, unsigned int flags)
1579 {
1580         struct rtmsg *rtm;
1581         struct nlmsghdr  *nlh;
1582         unsigned char    *b = skb->tail;
1583         struct rta_cacheinfo ci;
1584
1585         if (prefix) {   /* user wants prefix routes only */
1586                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1587                         /* success since this is not a prefix route */
1588                         return 1;
1589                 }
1590         }
1591
1592         nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1593         rtm = NLMSG_DATA(nlh);
1594         rtm->rtm_family = AF_INET6;
1595         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1596         rtm->rtm_src_len = rt->rt6i_src.plen;
1597         rtm->rtm_tos = 0;
1598         rtm->rtm_table = RT_TABLE_MAIN;
1599         if (rt->rt6i_flags&RTF_REJECT)
1600                 rtm->rtm_type = RTN_UNREACHABLE;
1601         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1602                 rtm->rtm_type = RTN_LOCAL;
1603         else
1604                 rtm->rtm_type = RTN_UNICAST;
1605         rtm->rtm_flags = 0;
1606         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1607         rtm->rtm_protocol = rt->rt6i_protocol;
1608         if (rt->rt6i_flags&RTF_DYNAMIC)
1609                 rtm->rtm_protocol = RTPROT_REDIRECT;
1610         else if (rt->rt6i_flags & RTF_ADDRCONF)
1611                 rtm->rtm_protocol = RTPROT_KERNEL;
1612         else if (rt->rt6i_flags&RTF_DEFAULT)
1613                 rtm->rtm_protocol = RTPROT_RA;
1614
1615         if (rt->rt6i_flags&RTF_CACHE)
1616                 rtm->rtm_flags |= RTM_F_CLONED;
1617
1618         if (dst) {
1619                 RTA_PUT(skb, RTA_DST, 16, dst);
1620                 rtm->rtm_dst_len = 128;
1621         } else if (rtm->rtm_dst_len)
1622                 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1623 #ifdef CONFIG_IPV6_SUBTREES
1624         if (src) {
1625                 RTA_PUT(skb, RTA_SRC, 16, src);
1626                 rtm->rtm_src_len = 128;
1627         } else if (rtm->rtm_src_len)
1628                 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1629 #endif
1630         if (iif)
1631                 RTA_PUT(skb, RTA_IIF, 4, &iif);
1632         else if (dst) {
1633                 struct in6_addr saddr_buf;
1634                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1635                         RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1636         }
1637         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1638                 goto rtattr_failure;
1639         if (rt->u.dst.neighbour)
1640                 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1641         if (rt->u.dst.dev)
1642                 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1643         RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1644         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1645         if (rt->rt6i_expires)
1646                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1647         else
1648                 ci.rta_expires = 0;
1649         ci.rta_used = rt->u.dst.__use;
1650         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1651         ci.rta_error = rt->u.dst.error;
1652         ci.rta_id = 0;
1653         ci.rta_ts = 0;
1654         ci.rta_tsage = 0;
1655         RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1656         nlh->nlmsg_len = skb->tail - b;
1657         return skb->len;
1658
1659 nlmsg_failure:
1660 rtattr_failure:
1661         skb_trim(skb, b - skb->data);
1662         return -1;
1663 }
1664
1665 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1666 {
1667         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1668         int prefix;
1669
1670         if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1671                 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1672                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1673         } else
1674                 prefix = 0;
1675
1676         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1677                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1678                      prefix, NLM_F_MULTI);
1679 }
1680
1681 static int fib6_dump_node(struct fib6_walker_t *w)
1682 {
1683         int res;
1684         struct rt6_info *rt;
1685
1686         for (rt = w->leaf; rt; rt = rt->u.next) {
1687                 res = rt6_dump_route(rt, w->args);
1688                 if (res < 0) {
1689                         /* Frame is full, suspend walking */
1690                         w->leaf = rt;
1691                         return 1;
1692                 }
1693                 BUG_TRAP(res!=0);
1694         }
1695         w->leaf = NULL;
1696         return 0;
1697 }
1698
1699 static void fib6_dump_end(struct netlink_callback *cb)
1700 {
1701         struct fib6_walker_t *w = (void*)cb->args[0];
1702
1703         if (w) {
1704                 cb->args[0] = 0;
1705                 fib6_walker_unlink(w);
1706                 kfree(w);
1707         }
1708         cb->done = (void*)cb->args[1];
1709         cb->args[1] = 0;
1710 }
1711
1712 static int fib6_dump_done(struct netlink_callback *cb)
1713 {
1714         fib6_dump_end(cb);
1715         return cb->done ? cb->done(cb) : 0;
1716 }
1717
1718 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1719 {
1720         struct rt6_rtnl_dump_arg arg;
1721         struct fib6_walker_t *w;
1722         int res;
1723
1724         arg.skb = skb;
1725         arg.cb = cb;
1726
1727         w = (void*)cb->args[0];
1728         if (w == NULL) {
1729                 /* New dump:
1730                  * 
1731                  * 1. hook callback destructor.
1732                  */
1733                 cb->args[1] = (long)cb->done;
1734                 cb->done = fib6_dump_done;
1735
1736                 /*
1737                  * 2. allocate and initialize walker.
1738                  */
1739                 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1740                 if (w == NULL)
1741                         return -ENOMEM;
1742                 RT6_TRACE("dump<%p", w);
1743                 memset(w, 0, sizeof(*w));
1744                 w->root = &ip6_routing_table;
1745                 w->func = fib6_dump_node;
1746                 w->args = &arg;
1747                 cb->args[0] = (long)w;
1748                 read_lock_bh(&rt6_lock);
1749                 res = fib6_walk(w);
1750                 read_unlock_bh(&rt6_lock);
1751         } else {
1752                 w->args = &arg;
1753                 read_lock_bh(&rt6_lock);
1754                 res = fib6_walk_continue(w);
1755                 read_unlock_bh(&rt6_lock);
1756         }
1757 #if RT6_DEBUG >= 3
1758         if (res <= 0 && skb->len == 0)
1759                 RT6_TRACE("%p>dump end\n", w);
1760 #endif
1761         res = res < 0 ? res : skb->len;
1762         /* res < 0 is an error. (really, impossible)
1763            res == 0 means that dump is complete, but skb still can contain data.
1764            res > 0 dump is not complete, but frame is full.
1765          */
1766         /* Destroy walker, if dump of this table is complete. */
1767         if (res <= 0)
1768                 fib6_dump_end(cb);
1769         return res;
1770 }
1771
1772 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1773 {
1774         struct rtattr **rta = arg;
1775         int iif = 0;
1776         int err = -ENOBUFS;
1777         struct sk_buff *skb;
1778         struct flowi fl;
1779         struct rt6_info *rt;
1780
1781         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1782         if (skb == NULL)
1783                 goto out;
1784
1785         /* Reserve room for dummy headers, this skb can pass
1786            through good chunk of routing engine.
1787          */
1788         skb->mac.raw = skb->data;
1789         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1790
1791         memset(&fl, 0, sizeof(fl));
1792         if (rta[RTA_SRC-1])
1793                 ipv6_addr_copy(&fl.fl6_src,
1794                                (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1795         if (rta[RTA_DST-1])
1796                 ipv6_addr_copy(&fl.fl6_dst,
1797                                (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1798
1799         if (rta[RTA_IIF-1])
1800                 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1801
1802         if (iif) {
1803                 struct net_device *dev;
1804                 dev = __dev_get_by_index(iif);
1805                 if (!dev) {
1806                         err = -ENODEV;
1807                         goto out_free;
1808                 }
1809         }
1810
1811         fl.oif = 0;
1812         if (rta[RTA_OIF-1])
1813                 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1814
1815         rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1816
1817         skb->dst = &rt->u.dst;
1818
1819         NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1820         err = rt6_fill_node(skb, rt, 
1821                             &fl.fl6_dst, &fl.fl6_src,
1822                             iif,
1823                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1824                             nlh->nlmsg_seq, 0, 0);
1825         if (err < 0) {
1826                 err = -EMSGSIZE;
1827                 goto out_free;
1828         }
1829
1830         err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1831         if (err > 0)
1832                 err = 0;
1833 out:
1834         return err;
1835 out_free:
1836         kfree_skb(skb);
1837         goto out;       
1838 }
1839
1840 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, 
1841                         struct netlink_skb_parms *req)
1842 {
1843         struct sk_buff *skb;
1844         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1845         u32 pid = current->pid;
1846         u32 seq = 0;
1847
1848         if (req)
1849                 pid = req->pid;
1850         if (nlh)
1851                 seq = nlh->nlmsg_seq;
1852         
1853         skb = alloc_skb(size, gfp_any());
1854         if (!skb) {
1855                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1856                 return;
1857         }
1858         if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1859                 kfree_skb(skb);
1860                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1861                 return;
1862         }
1863         NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1864         netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1865 }
1866
1867 /*
1868  *      /proc
1869  */
1870
1871 #ifdef CONFIG_PROC_FS
1872
1873 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1874
1875 struct rt6_proc_arg
1876 {
1877         char *buffer;
1878         int offset;
1879         int length;
1880         int skip;
1881         int len;
1882 };
1883
1884 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1885 {
1886         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1887         int i;
1888
1889         if (arg->skip < arg->offset / RT6_INFO_LEN) {
1890                 arg->skip++;
1891                 return 0;
1892         }
1893
1894         if (arg->len >= arg->length)
1895                 return 0;
1896
1897         for (i=0; i<16; i++) {
1898                 sprintf(arg->buffer + arg->len, "%02x",
1899                         rt->rt6i_dst.addr.s6_addr[i]);
1900                 arg->len += 2;
1901         }
1902         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1903                             rt->rt6i_dst.plen);
1904
1905 #ifdef CONFIG_IPV6_SUBTREES
1906         for (i=0; i<16; i++) {
1907                 sprintf(arg->buffer + arg->len, "%02x",
1908                         rt->rt6i_src.addr.s6_addr[i]);
1909                 arg->len += 2;
1910         }
1911         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1912                             rt->rt6i_src.plen);
1913 #else
1914         sprintf(arg->buffer + arg->len,
1915                 "00000000000000000000000000000000 00 ");
1916         arg->len += 36;
1917 #endif
1918
1919         if (rt->rt6i_nexthop) {
1920                 for (i=0; i<16; i++) {
1921                         sprintf(arg->buffer + arg->len, "%02x",
1922                                 rt->rt6i_nexthop->primary_key[i]);
1923                         arg->len += 2;
1924                 }
1925         } else {
1926                 sprintf(arg->buffer + arg->len,
1927                         "00000000000000000000000000000000");
1928                 arg->len += 32;
1929         }
1930         arg->len += sprintf(arg->buffer + arg->len,
1931                             " %08x %08x %08x %08x %8s\n",
1932                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1933                             rt->u.dst.__use, rt->rt6i_flags, 
1934                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
1935         return 0;
1936 }
1937
1938 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1939 {
1940         struct rt6_proc_arg arg;
1941         arg.buffer = buffer;
1942         arg.offset = offset;
1943         arg.length = length;
1944         arg.skip = 0;
1945         arg.len = 0;
1946
1947         read_lock_bh(&rt6_lock);
1948         fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1949         read_unlock_bh(&rt6_lock);
1950
1951         *start = buffer;
1952         if (offset)
1953                 *start += offset % RT6_INFO_LEN;
1954
1955         arg.len -= offset % RT6_INFO_LEN;
1956
1957         if (arg.len > length)
1958                 arg.len = length;
1959         if (arg.len < 0)
1960                 arg.len = 0;
1961
1962         return arg.len;
1963 }
1964
1965 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1966 {
1967         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1968                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1969                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1970                       rt6_stats.fib_rt_cache,
1971                       atomic_read(&ip6_dst_ops.entries),
1972                       rt6_stats.fib_discarded_routes);
1973
1974         return 0;
1975 }
1976
1977 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1978 {
1979         return single_open(file, rt6_stats_seq_show, NULL);
1980 }
1981
1982 static struct file_operations rt6_stats_seq_fops = {
1983         .owner   = THIS_MODULE,
1984         .open    = rt6_stats_seq_open,
1985         .read    = seq_read,
1986         .llseek  = seq_lseek,
1987         .release = single_release,
1988 };
1989 #endif  /* CONFIG_PROC_FS */
1990
1991 #ifdef CONFIG_SYSCTL
1992
1993 static int flush_delay;
1994
1995 static
1996 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1997                               void __user *buffer, size_t *lenp, loff_t *ppos)
1998 {
1999         if (write) {
2000                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2001                 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2002                 return 0;
2003         } else
2004                 return -EINVAL;
2005 }
2006
2007 ctl_table ipv6_route_table[] = {
2008         {
2009                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
2010                 .procname       =       "flush",
2011                 .data           =       &flush_delay,
2012                 .maxlen         =       sizeof(int),
2013                 .mode           =       0200,
2014                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
2015         },
2016         {
2017                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
2018                 .procname       =       "gc_thresh",
2019                 .data           =       &ip6_dst_ops.gc_thresh,
2020                 .maxlen         =       sizeof(int),
2021                 .mode           =       0644,
2022                 .proc_handler   =       &proc_dointvec,
2023         },
2024         {
2025                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2026                 .procname       =       "max_size",
2027                 .data           =       &ip6_rt_max_size,
2028                 .maxlen         =       sizeof(int),
2029                 .mode           =       0644,
2030                 .proc_handler   =       &proc_dointvec,
2031         },
2032         {
2033                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2034                 .procname       =       "gc_min_interval",
2035                 .data           =       &ip6_rt_gc_min_interval,
2036                 .maxlen         =       sizeof(int),
2037                 .mode           =       0644,
2038                 .proc_handler   =       &proc_dointvec_jiffies,
2039                 .strategy       =       &sysctl_jiffies,
2040         },
2041         {
2042                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2043                 .procname       =       "gc_timeout",
2044                 .data           =       &ip6_rt_gc_timeout,
2045                 .maxlen         =       sizeof(int),
2046                 .mode           =       0644,
2047                 .proc_handler   =       &proc_dointvec_jiffies,
2048                 .strategy       =       &sysctl_jiffies,
2049         },
2050         {
2051                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2052                 .procname       =       "gc_interval",
2053                 .data           =       &ip6_rt_gc_interval,
2054                 .maxlen         =       sizeof(int),
2055                 .mode           =       0644,
2056                 .proc_handler   =       &proc_dointvec_jiffies,
2057                 .strategy       =       &sysctl_jiffies,
2058         },
2059         {
2060                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2061                 .procname       =       "gc_elasticity",
2062                 .data           =       &ip6_rt_gc_elasticity,
2063                 .maxlen         =       sizeof(int),
2064                 .mode           =       0644,
2065                 .proc_handler   =       &proc_dointvec_jiffies,
2066                 .strategy       =       &sysctl_jiffies,
2067         },
2068         {
2069                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2070                 .procname       =       "mtu_expires",
2071                 .data           =       &ip6_rt_mtu_expires,
2072                 .maxlen         =       sizeof(int),
2073                 .mode           =       0644,
2074                 .proc_handler   =       &proc_dointvec_jiffies,
2075                 .strategy       =       &sysctl_jiffies,
2076         },
2077         {
2078                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2079                 .procname       =       "min_adv_mss",
2080                 .data           =       &ip6_rt_min_advmss,
2081                 .maxlen         =       sizeof(int),
2082                 .mode           =       0644,
2083                 .proc_handler   =       &proc_dointvec_jiffies,
2084                 .strategy       =       &sysctl_jiffies,
2085         },
2086         {
2087                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2088                 .procname       =       "gc_min_interval_ms",
2089                 .data           =       &ip6_rt_gc_min_interval,
2090                 .maxlen         =       sizeof(int),
2091                 .mode           =       0644,
2092                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2093                 .strategy       =       &sysctl_ms_jiffies,
2094         },
2095         { .ctl_name = 0 }
2096 };
2097
2098 #endif
2099
2100 void __init ip6_route_init(void)
2101 {
2102         struct proc_dir_entry *p;
2103
2104         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2105                                                      sizeof(struct rt6_info),
2106                                                      0, SLAB_HWCACHE_ALIGN,
2107                                                      NULL, NULL);
2108         if (!ip6_dst_ops.kmem_cachep)
2109                 panic("cannot create ip6_dst_cache");
2110
2111         fib6_init();
2112 #ifdef  CONFIG_PROC_FS
2113         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2114         if (p)
2115                 p->owner = THIS_MODULE;
2116
2117         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2118 #endif
2119 #ifdef CONFIG_XFRM
2120         xfrm6_init();
2121 #endif
2122 }
2123
2124 void ip6_route_cleanup(void)
2125 {
2126 #ifdef CONFIG_PROC_FS
2127         proc_net_remove("ipv6_route");
2128         proc_net_remove("rt6_stats");
2129 #endif
2130 #ifdef CONFIG_XFRM
2131         xfrm6_fini();
2132 #endif
2133         rt6_ifdown(NULL);
2134         fib6_gc_cleanup();
2135         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2136 }