]> err.no Git - linux-2.6/blob - net/ipv6/route.c
[IPV6]: ROUTE: Add rt6_alloc_clone() for cloning route allocation.
[linux-2.6] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/capability.h>
28 #include <linux/config.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/init.h>
39 #include <linux/netlink.h>
40 #include <linux/if_arp.h>
41
42 #ifdef  CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46
47 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
53 #include <net/tcp.h>
54 #include <linux/rtnetlink.h>
55 #include <net/dst.h>
56 #include <net/xfrm.h>
57
58 #include <asm/uaccess.h>
59
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74
75
76 static int ip6_rt_max_size = 4096;
77 static int ip6_rt_gc_min_interval = HZ / 2;
78 static int ip6_rt_gc_timeout = 60*HZ;
79 int ip6_rt_gc_interval = 30*HZ;
80 static int ip6_rt_gc_elasticity = 9;
81 static int ip6_rt_mtu_expires = 10*60*HZ;
82 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
83
84 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
85 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
86 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
87 static void             ip6_dst_destroy(struct dst_entry *);
88 static void             ip6_dst_ifdown(struct dst_entry *,
89                                        struct net_device *dev, int how);
90 static int               ip6_dst_gc(void);
91
92 static int              ip6_pkt_discard(struct sk_buff *skb);
93 static int              ip6_pkt_discard_out(struct sk_buff *skb);
94 static void             ip6_link_failure(struct sk_buff *skb);
95 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
96
97 static struct dst_ops ip6_dst_ops = {
98         .family                 =       AF_INET6,
99         .protocol               =       __constant_htons(ETH_P_IPV6),
100         .gc                     =       ip6_dst_gc,
101         .gc_thresh              =       1024,
102         .check                  =       ip6_dst_check,
103         .destroy                =       ip6_dst_destroy,
104         .ifdown                 =       ip6_dst_ifdown,
105         .negative_advice        =       ip6_negative_advice,
106         .link_failure           =       ip6_link_failure,
107         .update_pmtu            =       ip6_rt_update_pmtu,
108         .entry_size             =       sizeof(struct rt6_info),
109 };
110
111 struct rt6_info ip6_null_entry = {
112         .u = {
113                 .dst = {
114                         .__refcnt       = ATOMIC_INIT(1),
115                         .__use          = 1,
116                         .dev            = &loopback_dev,
117                         .obsolete       = -1,
118                         .error          = -ENETUNREACH,
119                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
120                         .input          = ip6_pkt_discard,
121                         .output         = ip6_pkt_discard_out,
122                         .ops            = &ip6_dst_ops,
123                         .path           = (struct dst_entry*)&ip6_null_entry,
124                 }
125         },
126         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
127         .rt6i_metric    = ~(u32) 0,
128         .rt6i_ref       = ATOMIC_INIT(1),
129 };
130
131 struct fib6_node ip6_routing_table = {
132         .leaf           = &ip6_null_entry,
133         .fn_flags       = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
134 };
135
136 /* Protects all the ip6 fib */
137
138 DEFINE_RWLOCK(rt6_lock);
139
140
141 /* allocate dst with ip6_dst_ops */
142 static __inline__ struct rt6_info *ip6_dst_alloc(void)
143 {
144         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
145 }
146
147 static void ip6_dst_destroy(struct dst_entry *dst)
148 {
149         struct rt6_info *rt = (struct rt6_info *)dst;
150         struct inet6_dev *idev = rt->rt6i_idev;
151
152         if (idev != NULL) {
153                 rt->rt6i_idev = NULL;
154                 in6_dev_put(idev);
155         }       
156 }
157
158 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
159                            int how)
160 {
161         struct rt6_info *rt = (struct rt6_info *)dst;
162         struct inet6_dev *idev = rt->rt6i_idev;
163
164         if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
165                 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
166                 if (loopback_idev != NULL) {
167                         rt->rt6i_idev = loopback_idev;
168                         in6_dev_put(idev);
169                 }
170         }
171 }
172
173 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
174 {
175         return (rt->rt6i_flags & RTF_EXPIRES &&
176                 time_after(jiffies, rt->rt6i_expires));
177 }
178
179 /*
180  *      Route lookup. Any rt6_lock is implied.
181  */
182
183 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
184                                                     int oif,
185                                                     int strict)
186 {
187         struct rt6_info *local = NULL;
188         struct rt6_info *sprt;
189
190         if (oif) {
191                 for (sprt = rt; sprt; sprt = sprt->u.next) {
192                         struct net_device *dev = sprt->rt6i_dev;
193                         if (dev->ifindex == oif)
194                                 return sprt;
195                         if (dev->flags & IFF_LOOPBACK) {
196                                 if (sprt->rt6i_idev == NULL ||
197                                     sprt->rt6i_idev->dev->ifindex != oif) {
198                                         if (strict && oif)
199                                                 continue;
200                                         if (local && (!oif || 
201                                                       local->rt6i_idev->dev->ifindex == oif))
202                                                 continue;
203                                 }
204                                 local = sprt;
205                         }
206                 }
207
208                 if (local)
209                         return local;
210
211                 if (strict)
212                         return &ip6_null_entry;
213         }
214         return rt;
215 }
216
217 /*
218  *      pointer to the last default router chosen. BH is disabled locally.
219  */
220 static struct rt6_info *rt6_dflt_pointer;
221 static DEFINE_SPINLOCK(rt6_dflt_lock);
222
223 void rt6_reset_dflt_pointer(struct rt6_info *rt)
224 {
225         spin_lock_bh(&rt6_dflt_lock);
226         if (rt == NULL || rt == rt6_dflt_pointer) {
227                 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
228                 rt6_dflt_pointer = NULL;
229         }
230         spin_unlock_bh(&rt6_dflt_lock);
231 }
232
233 /* Default Router Selection (RFC 2461 6.3.6) */
234 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
235 {
236         struct rt6_info *match = NULL;
237         struct rt6_info *sprt;
238         int mpri = 0;
239
240         for (sprt = rt; sprt; sprt = sprt->u.next) {
241                 struct neighbour *neigh;
242                 int m = 0;
243
244                 if (!oif ||
245                     (sprt->rt6i_dev &&
246                      sprt->rt6i_dev->ifindex == oif))
247                         m += 8;
248
249                 if (rt6_check_expired(sprt))
250                         continue;
251
252                 if (sprt == rt6_dflt_pointer)
253                         m += 4;
254
255                 if ((neigh = sprt->rt6i_nexthop) != NULL) {
256                         read_lock_bh(&neigh->lock);
257                         switch (neigh->nud_state) {
258                         case NUD_REACHABLE:
259                                 m += 3;
260                                 break;
261
262                         case NUD_STALE:
263                         case NUD_DELAY:
264                         case NUD_PROBE:
265                                 m += 2;
266                                 break;
267
268                         case NUD_NOARP:
269                         case NUD_PERMANENT:
270                                 m += 1;
271                                 break;
272
273                         case NUD_INCOMPLETE:
274                         default:
275                                 read_unlock_bh(&neigh->lock);
276                                 continue;
277                         }
278                         read_unlock_bh(&neigh->lock);
279                 } else {
280                         continue;
281                 }
282
283                 if (m > mpri || m >= 12) {
284                         match = sprt;
285                         mpri = m;
286                         if (m >= 12) {
287                                 /* we choose the last default router if it
288                                  * is in (probably) reachable state.
289                                  * If route changed, we should do pmtu
290                                  * discovery. --yoshfuji
291                                  */
292                                 break;
293                         }
294                 }
295         }
296
297         spin_lock(&rt6_dflt_lock);
298         if (!match) {
299                 /*
300                  *      No default routers are known to be reachable.
301                  *      SHOULD round robin
302                  */
303                 if (rt6_dflt_pointer) {
304                         for (sprt = rt6_dflt_pointer->u.next;
305                              sprt; sprt = sprt->u.next) {
306                                 if (sprt->u.dst.obsolete <= 0 &&
307                                     sprt->u.dst.error == 0 &&
308                                     !rt6_check_expired(sprt)) {
309                                         match = sprt;
310                                         break;
311                                 }
312                         }
313                         for (sprt = rt;
314                              !match && sprt;
315                              sprt = sprt->u.next) {
316                                 if (sprt->u.dst.obsolete <= 0 &&
317                                     sprt->u.dst.error == 0 &&
318                                     !rt6_check_expired(sprt)) {
319                                         match = sprt;
320                                         break;
321                                 }
322                                 if (sprt == rt6_dflt_pointer)
323                                         break;
324                         }
325                 }
326         }
327
328         if (match) {
329                 if (rt6_dflt_pointer != match)
330                         RT6_TRACE("changed default router: %p->%p\n",
331                                   rt6_dflt_pointer, match);
332                 rt6_dflt_pointer = match;
333         }
334         spin_unlock(&rt6_dflt_lock);
335
336         if (!match) {
337                 /*
338                  * Last Resort: if no default routers found, 
339                  * use addrconf default route.
340                  * We don't record this route.
341                  */
342                 for (sprt = ip6_routing_table.leaf;
343                      sprt; sprt = sprt->u.next) {
344                         if (!rt6_check_expired(sprt) &&
345                             (sprt->rt6i_flags & RTF_DEFAULT) &&
346                             (!oif ||
347                              (sprt->rt6i_dev &&
348                               sprt->rt6i_dev->ifindex == oif))) {
349                                 match = sprt;
350                                 break;
351                         }
352                 }
353                 if (!match) {
354                         /* no default route.  give up. */
355                         match = &ip6_null_entry;
356                 }
357         }
358
359         return match;
360 }
361
362 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
363                             int oif, int strict)
364 {
365         struct fib6_node *fn;
366         struct rt6_info *rt;
367
368         read_lock_bh(&rt6_lock);
369         fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
370         rt = rt6_device_match(fn->leaf, oif, strict);
371         dst_hold(&rt->u.dst);
372         rt->u.dst.__use++;
373         read_unlock_bh(&rt6_lock);
374
375         rt->u.dst.lastuse = jiffies;
376         if (rt->u.dst.error == 0)
377                 return rt;
378         dst_release(&rt->u.dst);
379         return NULL;
380 }
381
382 /* ip6_ins_rt is called with FREE rt6_lock.
383    It takes new route entry, the addition fails by any reason the
384    route is freed. In any case, if caller does not hold it, it may
385    be destroyed.
386  */
387
388 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
389                 void *_rtattr, struct netlink_skb_parms *req)
390 {
391         int err;
392
393         write_lock_bh(&rt6_lock);
394         err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
395         write_unlock_bh(&rt6_lock);
396
397         return err;
398 }
399
400 /* No rt6_lock! If COW failed, the function returns dead route entry
401    with dst->error set to errno value.
402  */
403
404 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
405                                       struct in6_addr *saddr)
406 {
407         struct rt6_info *rt;
408
409         /*
410          *      Clone the route.
411          */
412
413         rt = ip6_rt_copy(ort);
414
415         if (rt) {
416                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
417                         if (rt->rt6i_dst.plen != 128 &&
418                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
419                                 rt->rt6i_flags |= RTF_ANYCAST;
420                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
421                 }
422
423                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
424                 rt->rt6i_dst.plen = 128;
425                 rt->rt6i_flags |= RTF_CACHE;
426                 rt->u.dst.flags |= DST_HOST;
427
428 #ifdef CONFIG_IPV6_SUBTREES
429                 if (rt->rt6i_src.plen && saddr) {
430                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
431                         rt->rt6i_src.plen = 128;
432                 }
433 #endif
434
435                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
436
437         }
438
439         return rt;
440 }
441
442 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
443                                 struct in6_addr *saddr, struct netlink_skb_parms *req)
444 {
445         struct rt6_info *rt = rt6_alloc_cow(ort, daddr, saddr);
446         int err;
447
448         if (!rt) {
449                 dst_hold(&ip6_null_entry.u.dst);
450                 return &ip6_null_entry;
451         }
452
453         dst_hold(&rt->u.dst);
454
455         err = ip6_ins_rt(rt, NULL, NULL, req);
456         if (err)
457                 rt->u.dst.error = err;
458
459         return rt;
460 }
461
462 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
463 {
464         struct rt6_info *rt = ip6_rt_copy(ort);
465         if (rt) {
466                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
467                 rt->rt6i_dst.plen = 128;
468                 rt->rt6i_flags |= RTF_CACHE;
469                 if (rt->rt6i_flags & RTF_REJECT)
470                         rt->u.dst.error = ort->u.dst.error;
471                 rt->u.dst.flags |= DST_HOST;
472                 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
473         }
474         return rt;
475 }
476
477 #define BACKTRACK() \
478 if (rt == &ip6_null_entry && strict) { \
479        while ((fn = fn->parent) != NULL) { \
480                 if (fn->fn_flags & RTN_ROOT) { \
481                         dst_hold(&rt->u.dst); \
482                         goto out; \
483                 } \
484                 if (fn->fn_flags & RTN_RTINFO) \
485                         goto restart; \
486         } \
487 }
488
489
490 void ip6_route_input(struct sk_buff *skb)
491 {
492         struct fib6_node *fn;
493         struct rt6_info *rt;
494         int strict;
495         int attempts = 3;
496
497         strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
498
499 relookup:
500         read_lock_bh(&rt6_lock);
501
502         fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
503                          &skb->nh.ipv6h->saddr);
504
505 restart:
506         rt = fn->leaf;
507
508         if ((rt->rt6i_flags & RTF_CACHE)) {
509                 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
510                 BACKTRACK();
511                 dst_hold(&rt->u.dst);
512                 goto out;
513         }
514
515         rt = rt6_device_match(rt, skb->dev->ifindex, strict);
516         BACKTRACK();
517
518         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
519                 struct rt6_info *nrt;
520                 dst_hold(&rt->u.dst);
521                 read_unlock_bh(&rt6_lock);
522
523                 nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
524                               &skb->nh.ipv6h->saddr,
525                               &NETLINK_CB(skb));
526
527                 dst_release(&rt->u.dst);
528                 rt = nrt;
529
530                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
531                         goto out2;
532
533                 /* Race condition! In the gap, when rt6_lock was
534                    released someone could insert this route.  Relookup.
535                 */
536                 dst_release(&rt->u.dst);
537                 goto relookup;
538         }
539         dst_hold(&rt->u.dst);
540
541 out:
542         read_unlock_bh(&rt6_lock);
543 out2:
544         rt->u.dst.lastuse = jiffies;
545         rt->u.dst.__use++;
546         skb->dst = (struct dst_entry *) rt;
547 }
548
549 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
550 {
551         struct fib6_node *fn;
552         struct rt6_info *rt;
553         int strict;
554         int attempts = 3;
555
556         strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
557
558 relookup:
559         read_lock_bh(&rt6_lock);
560
561         fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
562
563 restart:
564         rt = fn->leaf;
565
566         if ((rt->rt6i_flags & RTF_CACHE)) {
567                 rt = rt6_device_match(rt, fl->oif, strict);
568                 BACKTRACK();
569                 dst_hold(&rt->u.dst);
570                 goto out;
571         }
572         if (rt->rt6i_flags & RTF_DEFAULT) {
573                 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
574                         rt = rt6_best_dflt(rt, fl->oif);
575         } else {
576                 rt = rt6_device_match(rt, fl->oif, strict);
577                 BACKTRACK();
578         }
579
580         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
581                 struct rt6_info *nrt;
582                 dst_hold(&rt->u.dst);
583                 read_unlock_bh(&rt6_lock);
584
585                 nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src, NULL);
586
587                 dst_release(&rt->u.dst);
588                 rt = nrt;
589
590                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
591                         goto out2;
592
593                 /* Race condition! In the gap, when rt6_lock was
594                    released someone could insert this route.  Relookup.
595                 */
596                 dst_release(&rt->u.dst);
597                 goto relookup;
598         }
599         dst_hold(&rt->u.dst);
600
601 out:
602         read_unlock_bh(&rt6_lock);
603 out2:
604         rt->u.dst.lastuse = jiffies;
605         rt->u.dst.__use++;
606         return &rt->u.dst;
607 }
608
609
610 /*
611  *      Destination cache support functions
612  */
613
614 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
615 {
616         struct rt6_info *rt;
617
618         rt = (struct rt6_info *) dst;
619
620         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
621                 return dst;
622
623         return NULL;
624 }
625
626 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
627 {
628         struct rt6_info *rt = (struct rt6_info *) dst;
629
630         if (rt) {
631                 if (rt->rt6i_flags & RTF_CACHE)
632                         ip6_del_rt(rt, NULL, NULL, NULL);
633                 else
634                         dst_release(dst);
635         }
636         return NULL;
637 }
638
639 static void ip6_link_failure(struct sk_buff *skb)
640 {
641         struct rt6_info *rt;
642
643         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
644
645         rt = (struct rt6_info *) skb->dst;
646         if (rt) {
647                 if (rt->rt6i_flags&RTF_CACHE) {
648                         dst_set_expires(&rt->u.dst, 0);
649                         rt->rt6i_flags |= RTF_EXPIRES;
650                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
651                         rt->rt6i_node->fn_sernum = -1;
652         }
653 }
654
655 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
656 {
657         struct rt6_info *rt6 = (struct rt6_info*)dst;
658
659         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
660                 rt6->rt6i_flags |= RTF_MODIFIED;
661                 if (mtu < IPV6_MIN_MTU) {
662                         mtu = IPV6_MIN_MTU;
663                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
664                 }
665                 dst->metrics[RTAX_MTU-1] = mtu;
666         }
667 }
668
669 /* Protected by rt6_lock.  */
670 static struct dst_entry *ndisc_dst_gc_list;
671 static int ipv6_get_mtu(struct net_device *dev);
672
673 static inline unsigned int ipv6_advmss(unsigned int mtu)
674 {
675         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
676
677         if (mtu < ip6_rt_min_advmss)
678                 mtu = ip6_rt_min_advmss;
679
680         /*
681          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
682          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
683          * IPV6_MAXPLEN is also valid and means: "any MSS, 
684          * rely only on pmtu discovery"
685          */
686         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
687                 mtu = IPV6_MAXPLEN;
688         return mtu;
689 }
690
691 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
692                                   struct neighbour *neigh,
693                                   struct in6_addr *addr,
694                                   int (*output)(struct sk_buff *))
695 {
696         struct rt6_info *rt;
697         struct inet6_dev *idev = in6_dev_get(dev);
698
699         if (unlikely(idev == NULL))
700                 return NULL;
701
702         rt = ip6_dst_alloc();
703         if (unlikely(rt == NULL)) {
704                 in6_dev_put(idev);
705                 goto out;
706         }
707
708         dev_hold(dev);
709         if (neigh)
710                 neigh_hold(neigh);
711         else
712                 neigh = ndisc_get_neigh(dev, addr);
713
714         rt->rt6i_dev      = dev;
715         rt->rt6i_idev     = idev;
716         rt->rt6i_nexthop  = neigh;
717         atomic_set(&rt->u.dst.__refcnt, 1);
718         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
719         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
720         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
721         rt->u.dst.output  = output;
722
723 #if 0   /* there's no chance to use these for ndisc */
724         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
725                                 ? DST_HOST 
726                                 : 0;
727         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
728         rt->rt6i_dst.plen = 128;
729 #endif
730
731         write_lock_bh(&rt6_lock);
732         rt->u.dst.next = ndisc_dst_gc_list;
733         ndisc_dst_gc_list = &rt->u.dst;
734         write_unlock_bh(&rt6_lock);
735
736         fib6_force_start_gc();
737
738 out:
739         return (struct dst_entry *)rt;
740 }
741
742 int ndisc_dst_gc(int *more)
743 {
744         struct dst_entry *dst, *next, **pprev;
745         int freed;
746
747         next = NULL;
748         pprev = &ndisc_dst_gc_list;
749         freed = 0;
750         while ((dst = *pprev) != NULL) {
751                 if (!atomic_read(&dst->__refcnt)) {
752                         *pprev = dst->next;
753                         dst_free(dst);
754                         freed++;
755                 } else {
756                         pprev = &dst->next;
757                         (*more)++;
758                 }
759         }
760
761         return freed;
762 }
763
764 static int ip6_dst_gc(void)
765 {
766         static unsigned expire = 30*HZ;
767         static unsigned long last_gc;
768         unsigned long now = jiffies;
769
770         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
771             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
772                 goto out;
773
774         expire++;
775         fib6_run_gc(expire);
776         last_gc = now;
777         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
778                 expire = ip6_rt_gc_timeout>>1;
779
780 out:
781         expire -= expire>>ip6_rt_gc_elasticity;
782         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
783 }
784
785 /* Clean host part of a prefix. Not necessary in radix tree,
786    but results in cleaner routing tables.
787
788    Remove it only when all the things will work!
789  */
790
791 static int ipv6_get_mtu(struct net_device *dev)
792 {
793         int mtu = IPV6_MIN_MTU;
794         struct inet6_dev *idev;
795
796         idev = in6_dev_get(dev);
797         if (idev) {
798                 mtu = idev->cnf.mtu6;
799                 in6_dev_put(idev);
800         }
801         return mtu;
802 }
803
804 int ipv6_get_hoplimit(struct net_device *dev)
805 {
806         int hoplimit = ipv6_devconf.hop_limit;
807         struct inet6_dev *idev;
808
809         idev = in6_dev_get(dev);
810         if (idev) {
811                 hoplimit = idev->cnf.hop_limit;
812                 in6_dev_put(idev);
813         }
814         return hoplimit;
815 }
816
817 /*
818  *
819  */
820
821 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, 
822                 void *_rtattr, struct netlink_skb_parms *req)
823 {
824         int err;
825         struct rtmsg *r;
826         struct rtattr **rta;
827         struct rt6_info *rt = NULL;
828         struct net_device *dev = NULL;
829         struct inet6_dev *idev = NULL;
830         int addr_type;
831
832         rta = (struct rtattr **) _rtattr;
833
834         if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
835                 return -EINVAL;
836 #ifndef CONFIG_IPV6_SUBTREES
837         if (rtmsg->rtmsg_src_len)
838                 return -EINVAL;
839 #endif
840         if (rtmsg->rtmsg_ifindex) {
841                 err = -ENODEV;
842                 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
843                 if (!dev)
844                         goto out;
845                 idev = in6_dev_get(dev);
846                 if (!idev)
847                         goto out;
848         }
849
850         if (rtmsg->rtmsg_metric == 0)
851                 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
852
853         rt = ip6_dst_alloc();
854
855         if (rt == NULL) {
856                 err = -ENOMEM;
857                 goto out;
858         }
859
860         rt->u.dst.obsolete = -1;
861         rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
862         if (nlh && (r = NLMSG_DATA(nlh))) {
863                 rt->rt6i_protocol = r->rtm_protocol;
864         } else {
865                 rt->rt6i_protocol = RTPROT_BOOT;
866         }
867
868         addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
869
870         if (addr_type & IPV6_ADDR_MULTICAST)
871                 rt->u.dst.input = ip6_mc_input;
872         else
873                 rt->u.dst.input = ip6_forward;
874
875         rt->u.dst.output = ip6_output;
876
877         ipv6_addr_prefix(&rt->rt6i_dst.addr, 
878                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
879         rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
880         if (rt->rt6i_dst.plen == 128)
881                rt->u.dst.flags = DST_HOST;
882
883 #ifdef CONFIG_IPV6_SUBTREES
884         ipv6_addr_prefix(&rt->rt6i_src.addr, 
885                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
886         rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
887 #endif
888
889         rt->rt6i_metric = rtmsg->rtmsg_metric;
890
891         /* We cannot add true routes via loopback here,
892            they would result in kernel looping; promote them to reject routes
893          */
894         if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
895             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
896                 /* hold loopback dev/idev if we haven't done so. */
897                 if (dev != &loopback_dev) {
898                         if (dev) {
899                                 dev_put(dev);
900                                 in6_dev_put(idev);
901                         }
902                         dev = &loopback_dev;
903                         dev_hold(dev);
904                         idev = in6_dev_get(dev);
905                         if (!idev) {
906                                 err = -ENODEV;
907                                 goto out;
908                         }
909                 }
910                 rt->u.dst.output = ip6_pkt_discard_out;
911                 rt->u.dst.input = ip6_pkt_discard;
912                 rt->u.dst.error = -ENETUNREACH;
913                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
914                 goto install_route;
915         }
916
917         if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
918                 struct in6_addr *gw_addr;
919                 int gwa_type;
920
921                 gw_addr = &rtmsg->rtmsg_gateway;
922                 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
923                 gwa_type = ipv6_addr_type(gw_addr);
924
925                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
926                         struct rt6_info *grt;
927
928                         /* IPv6 strictly inhibits using not link-local
929                            addresses as nexthop address.
930                            Otherwise, router will not able to send redirects.
931                            It is very good, but in some (rare!) circumstances
932                            (SIT, PtP, NBMA NOARP links) it is handy to allow
933                            some exceptions. --ANK
934                          */
935                         err = -EINVAL;
936                         if (!(gwa_type&IPV6_ADDR_UNICAST))
937                                 goto out;
938
939                         grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
940
941                         err = -EHOSTUNREACH;
942                         if (grt == NULL)
943                                 goto out;
944                         if (dev) {
945                                 if (dev != grt->rt6i_dev) {
946                                         dst_release(&grt->u.dst);
947                                         goto out;
948                                 }
949                         } else {
950                                 dev = grt->rt6i_dev;
951                                 idev = grt->rt6i_idev;
952                                 dev_hold(dev);
953                                 in6_dev_hold(grt->rt6i_idev);
954                         }
955                         if (!(grt->rt6i_flags&RTF_GATEWAY))
956                                 err = 0;
957                         dst_release(&grt->u.dst);
958
959                         if (err)
960                                 goto out;
961                 }
962                 err = -EINVAL;
963                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
964                         goto out;
965         }
966
967         err = -ENODEV;
968         if (dev == NULL)
969                 goto out;
970
971         if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
972                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
973                 if (IS_ERR(rt->rt6i_nexthop)) {
974                         err = PTR_ERR(rt->rt6i_nexthop);
975                         rt->rt6i_nexthop = NULL;
976                         goto out;
977                 }
978         }
979
980         rt->rt6i_flags = rtmsg->rtmsg_flags;
981
982 install_route:
983         if (rta && rta[RTA_METRICS-1]) {
984                 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
985                 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
986
987                 while (RTA_OK(attr, attrlen)) {
988                         unsigned flavor = attr->rta_type;
989                         if (flavor) {
990                                 if (flavor > RTAX_MAX) {
991                                         err = -EINVAL;
992                                         goto out;
993                                 }
994                                 rt->u.dst.metrics[flavor-1] =
995                                         *(u32 *)RTA_DATA(attr);
996                         }
997                         attr = RTA_NEXT(attr, attrlen);
998                 }
999         }
1000
1001         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1002                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1003         if (!rt->u.dst.metrics[RTAX_MTU-1])
1004                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1005         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1006                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1007         rt->u.dst.dev = dev;
1008         rt->rt6i_idev = idev;
1009         return ip6_ins_rt(rt, nlh, _rtattr, req);
1010
1011 out:
1012         if (dev)
1013                 dev_put(dev);
1014         if (idev)
1015                 in6_dev_put(idev);
1016         if (rt)
1017                 dst_free((struct dst_entry *) rt);
1018         return err;
1019 }
1020
1021 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1022 {
1023         int err;
1024
1025         write_lock_bh(&rt6_lock);
1026
1027         rt6_reset_dflt_pointer(NULL);
1028
1029         err = fib6_del(rt, nlh, _rtattr, req);
1030         dst_release(&rt->u.dst);
1031
1032         write_unlock_bh(&rt6_lock);
1033
1034         return err;
1035 }
1036
1037 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1038 {
1039         struct fib6_node *fn;
1040         struct rt6_info *rt;
1041         int err = -ESRCH;
1042
1043         read_lock_bh(&rt6_lock);
1044
1045         fn = fib6_locate(&ip6_routing_table,
1046                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1047                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1048         
1049         if (fn) {
1050                 for (rt = fn->leaf; rt; rt = rt->u.next) {
1051                         if (rtmsg->rtmsg_ifindex &&
1052                             (rt->rt6i_dev == NULL ||
1053                              rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1054                                 continue;
1055                         if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1056                             !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1057                                 continue;
1058                         if (rtmsg->rtmsg_metric &&
1059                             rtmsg->rtmsg_metric != rt->rt6i_metric)
1060                                 continue;
1061                         dst_hold(&rt->u.dst);
1062                         read_unlock_bh(&rt6_lock);
1063
1064                         return ip6_del_rt(rt, nlh, _rtattr, req);
1065                 }
1066         }
1067         read_unlock_bh(&rt6_lock);
1068
1069         return err;
1070 }
1071
1072 /*
1073  *      Handle redirects
1074  */
1075 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1076                   struct neighbour *neigh, u8 *lladdr, int on_link)
1077 {
1078         struct rt6_info *rt, *nrt;
1079
1080         /* Locate old route to this destination. */
1081         rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1082
1083         if (rt == NULL)
1084                 return;
1085
1086         if (neigh->dev != rt->rt6i_dev)
1087                 goto out;
1088
1089         /*
1090          * Current route is on-link; redirect is always invalid.
1091          * 
1092          * Seems, previous statement is not true. It could
1093          * be node, which looks for us as on-link (f.e. proxy ndisc)
1094          * But then router serving it might decide, that we should
1095          * know truth 8)8) --ANK (980726).
1096          */
1097         if (!(rt->rt6i_flags&RTF_GATEWAY))
1098                 goto out;
1099
1100         /*
1101          *      RFC 2461 specifies that redirects should only be
1102          *      accepted if they come from the nexthop to the target.
1103          *      Due to the way default routers are chosen, this notion
1104          *      is a bit fuzzy and one might need to check all default
1105          *      routers.
1106          */
1107         if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1108                 if (rt->rt6i_flags & RTF_DEFAULT) {
1109                         struct rt6_info *rt1;
1110
1111                         read_lock(&rt6_lock);
1112                         for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1113                                 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1114                                         dst_hold(&rt1->u.dst);
1115                                         dst_release(&rt->u.dst);
1116                                         read_unlock(&rt6_lock);
1117                                         rt = rt1;
1118                                         goto source_ok;
1119                                 }
1120                         }
1121                         read_unlock(&rt6_lock);
1122                 }
1123                 if (net_ratelimit())
1124                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1125                                "for redirect target\n");
1126                 goto out;
1127         }
1128
1129 source_ok:
1130
1131         /*
1132          *      We have finally decided to accept it.
1133          */
1134
1135         neigh_update(neigh, lladdr, NUD_STALE, 
1136                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1137                      NEIGH_UPDATE_F_OVERRIDE|
1138                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1139                                      NEIGH_UPDATE_F_ISROUTER))
1140                      );
1141
1142         /*
1143          * Redirect received -> path was valid.
1144          * Look, redirects are sent only in response to data packets,
1145          * so that this nexthop apparently is reachable. --ANK
1146          */
1147         dst_confirm(&rt->u.dst);
1148
1149         /* Duplicate redirect: silently ignore. */
1150         if (neigh == rt->u.dst.neighbour)
1151                 goto out;
1152
1153         nrt = ip6_rt_copy(rt);
1154         if (nrt == NULL)
1155                 goto out;
1156
1157         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1158         if (on_link)
1159                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1160
1161         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1162         nrt->rt6i_dst.plen = 128;
1163         nrt->u.dst.flags |= DST_HOST;
1164
1165         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1166         nrt->rt6i_nexthop = neigh_clone(neigh);
1167         /* Reset pmtu, it may be better */
1168         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1169         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1170
1171         if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1172                 goto out;
1173
1174         if (rt->rt6i_flags&RTF_CACHE) {
1175                 ip6_del_rt(rt, NULL, NULL, NULL);
1176                 return;
1177         }
1178
1179 out:
1180         dst_release(&rt->u.dst);
1181         return;
1182 }
1183
1184 /*
1185  *      Handle ICMP "packet too big" messages
1186  *      i.e. Path MTU discovery
1187  */
1188
1189 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1190                         struct net_device *dev, u32 pmtu)
1191 {
1192         struct rt6_info *rt, *nrt;
1193         int allfrag = 0;
1194
1195         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1196         if (rt == NULL)
1197                 return;
1198
1199         if (pmtu >= dst_mtu(&rt->u.dst))
1200                 goto out;
1201
1202         if (pmtu < IPV6_MIN_MTU) {
1203                 /*
1204                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link 
1205                  * MTU (1280) and a fragment header should always be included
1206                  * after a node receiving Too Big message reporting PMTU is
1207                  * less than the IPv6 Minimum Link MTU.
1208                  */
1209                 pmtu = IPV6_MIN_MTU;
1210                 allfrag = 1;
1211         }
1212
1213         /* New mtu received -> path was valid.
1214            They are sent only in response to data packets,
1215            so that this nexthop apparently is reachable. --ANK
1216          */
1217         dst_confirm(&rt->u.dst);
1218
1219         /* Host route. If it is static, it would be better
1220            not to override it, but add new one, so that
1221            when cache entry will expire old pmtu
1222            would return automatically.
1223          */
1224         if (rt->rt6i_flags & RTF_CACHE) {
1225                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1226                 if (allfrag)
1227                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1228                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1229                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1230                 goto out;
1231         }
1232
1233         /* Network route.
1234            Two cases are possible:
1235            1. It is connected route. Action: COW
1236            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1237          */
1238         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1239                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1240                 if (!nrt)
1241                         goto out;
1242
1243                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1244                 if (allfrag)
1245                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1246
1247                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1248                  * happened within 5 mins, the recommended timer is 10 mins.
1249                  * Here this route expiration time is set to ip6_rt_mtu_expires
1250                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1251                  * and detecting PMTU increase will be automatically happened.
1252                  */
1253                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1254                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1255
1256                 ip6_ins_rt(nrt, NULL, NULL, NULL);
1257         } else {
1258                 nrt = rt6_alloc_clone(rt, daddr);
1259                 if (!nrt)
1260                         goto out;
1261                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1262                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1263                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1264                 if (allfrag)
1265                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1266                 ip6_ins_rt(nrt, NULL, NULL, NULL);
1267         }
1268
1269 out:
1270         dst_release(&rt->u.dst);
1271 }
1272
1273 /*
1274  *      Misc support functions
1275  */
1276
1277 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1278 {
1279         struct rt6_info *rt = ip6_dst_alloc();
1280
1281         if (rt) {
1282                 rt->u.dst.input = ort->u.dst.input;
1283                 rt->u.dst.output = ort->u.dst.output;
1284
1285                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1286                 rt->u.dst.dev = ort->u.dst.dev;
1287                 if (rt->u.dst.dev)
1288                         dev_hold(rt->u.dst.dev);
1289                 rt->rt6i_idev = ort->rt6i_idev;
1290                 if (rt->rt6i_idev)
1291                         in6_dev_hold(rt->rt6i_idev);
1292                 rt->u.dst.lastuse = jiffies;
1293                 rt->rt6i_expires = 0;
1294
1295                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1296                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1297                 rt->rt6i_metric = 0;
1298
1299                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1300 #ifdef CONFIG_IPV6_SUBTREES
1301                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1302 #endif
1303         }
1304         return rt;
1305 }
1306
1307 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1308 {       
1309         struct rt6_info *rt;
1310         struct fib6_node *fn;
1311
1312         fn = &ip6_routing_table;
1313
1314         write_lock_bh(&rt6_lock);
1315         for (rt = fn->leaf; rt; rt=rt->u.next) {
1316                 if (dev == rt->rt6i_dev &&
1317                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1318                         break;
1319         }
1320         if (rt)
1321                 dst_hold(&rt->u.dst);
1322         write_unlock_bh(&rt6_lock);
1323         return rt;
1324 }
1325
1326 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1327                                      struct net_device *dev)
1328 {
1329         struct in6_rtmsg rtmsg;
1330
1331         memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1332         rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1333         ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1334         rtmsg.rtmsg_metric = 1024;
1335         rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1336
1337         rtmsg.rtmsg_ifindex = dev->ifindex;
1338
1339         ip6_route_add(&rtmsg, NULL, NULL, NULL);
1340         return rt6_get_dflt_router(gwaddr, dev);
1341 }
1342
1343 void rt6_purge_dflt_routers(void)
1344 {
1345         struct rt6_info *rt;
1346
1347 restart:
1348         read_lock_bh(&rt6_lock);
1349         for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1350                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1351                         dst_hold(&rt->u.dst);
1352
1353                         rt6_reset_dflt_pointer(NULL);
1354
1355                         read_unlock_bh(&rt6_lock);
1356
1357                         ip6_del_rt(rt, NULL, NULL, NULL);
1358
1359                         goto restart;
1360                 }
1361         }
1362         read_unlock_bh(&rt6_lock);
1363 }
1364
1365 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1366 {
1367         struct in6_rtmsg rtmsg;
1368         int err;
1369
1370         switch(cmd) {
1371         case SIOCADDRT:         /* Add a route */
1372         case SIOCDELRT:         /* Delete a route */
1373                 if (!capable(CAP_NET_ADMIN))
1374                         return -EPERM;
1375                 err = copy_from_user(&rtmsg, arg,
1376                                      sizeof(struct in6_rtmsg));
1377                 if (err)
1378                         return -EFAULT;
1379                         
1380                 rtnl_lock();
1381                 switch (cmd) {
1382                 case SIOCADDRT:
1383                         err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1384                         break;
1385                 case SIOCDELRT:
1386                         err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1387                         break;
1388                 default:
1389                         err = -EINVAL;
1390                 }
1391                 rtnl_unlock();
1392
1393                 return err;
1394         };
1395
1396         return -EINVAL;
1397 }
1398
1399 /*
1400  *      Drop the packet on the floor
1401  */
1402
1403 static int ip6_pkt_discard(struct sk_buff *skb)
1404 {
1405         IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1406         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1407         kfree_skb(skb);
1408         return 0;
1409 }
1410
1411 static int ip6_pkt_discard_out(struct sk_buff *skb)
1412 {
1413         skb->dev = skb->dst->dev;
1414         return ip6_pkt_discard(skb);
1415 }
1416
1417 /*
1418  *      Allocate a dst for local (unicast / anycast) address.
1419  */
1420
1421 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1422                                     const struct in6_addr *addr,
1423                                     int anycast)
1424 {
1425         struct rt6_info *rt = ip6_dst_alloc();
1426
1427         if (rt == NULL)
1428                 return ERR_PTR(-ENOMEM);
1429
1430         dev_hold(&loopback_dev);
1431         in6_dev_hold(idev);
1432
1433         rt->u.dst.flags = DST_HOST;
1434         rt->u.dst.input = ip6_input;
1435         rt->u.dst.output = ip6_output;
1436         rt->rt6i_dev = &loopback_dev;
1437         rt->rt6i_idev = idev;
1438         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1439         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1440         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1441         rt->u.dst.obsolete = -1;
1442
1443         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1444         if (anycast)
1445                 rt->rt6i_flags |= RTF_ANYCAST;
1446         else
1447                 rt->rt6i_flags |= RTF_LOCAL;
1448         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1449         if (rt->rt6i_nexthop == NULL) {
1450                 dst_free((struct dst_entry *) rt);
1451                 return ERR_PTR(-ENOMEM);
1452         }
1453
1454         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1455         rt->rt6i_dst.plen = 128;
1456
1457         atomic_set(&rt->u.dst.__refcnt, 1);
1458
1459         return rt;
1460 }
1461
1462 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1463 {
1464         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1465             rt != &ip6_null_entry) {
1466                 RT6_TRACE("deleted by ifdown %p\n", rt);
1467                 return -1;
1468         }
1469         return 0;
1470 }
1471
1472 void rt6_ifdown(struct net_device *dev)
1473 {
1474         write_lock_bh(&rt6_lock);
1475         fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1476         write_unlock_bh(&rt6_lock);
1477 }
1478
1479 struct rt6_mtu_change_arg
1480 {
1481         struct net_device *dev;
1482         unsigned mtu;
1483 };
1484
1485 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1486 {
1487         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1488         struct inet6_dev *idev;
1489
1490         /* In IPv6 pmtu discovery is not optional,
1491            so that RTAX_MTU lock cannot disable it.
1492            We still use this lock to block changes
1493            caused by addrconf/ndisc.
1494         */
1495
1496         idev = __in6_dev_get(arg->dev);
1497         if (idev == NULL)
1498                 return 0;
1499
1500         /* For administrative MTU increase, there is no way to discover
1501            IPv6 PMTU increase, so PMTU increase should be updated here.
1502            Since RFC 1981 doesn't include administrative MTU increase
1503            update PMTU increase is a MUST. (i.e. jumbo frame)
1504          */
1505         /*
1506            If new MTU is less than route PMTU, this new MTU will be the
1507            lowest MTU in the path, update the route PMTU to reflect PMTU
1508            decreases; if new MTU is greater than route PMTU, and the
1509            old MTU is the lowest MTU in the path, update the route PMTU
1510            to reflect the increase. In this case if the other nodes' MTU
1511            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1512            PMTU discouvery.
1513          */
1514         if (rt->rt6i_dev == arg->dev &&
1515             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1516             (dst_mtu(&rt->u.dst) > arg->mtu ||
1517              (dst_mtu(&rt->u.dst) < arg->mtu &&
1518               dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1519                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1520         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1521         return 0;
1522 }
1523
1524 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1525 {
1526         struct rt6_mtu_change_arg arg;
1527
1528         arg.dev = dev;
1529         arg.mtu = mtu;
1530         read_lock_bh(&rt6_lock);
1531         fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1532         read_unlock_bh(&rt6_lock);
1533 }
1534
1535 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1536                               struct in6_rtmsg *rtmsg)
1537 {
1538         memset(rtmsg, 0, sizeof(*rtmsg));
1539
1540         rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1541         rtmsg->rtmsg_src_len = r->rtm_src_len;
1542         rtmsg->rtmsg_flags = RTF_UP;
1543         if (r->rtm_type == RTN_UNREACHABLE)
1544                 rtmsg->rtmsg_flags |= RTF_REJECT;
1545
1546         if (rta[RTA_GATEWAY-1]) {
1547                 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1548                         return -EINVAL;
1549                 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1550                 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1551         }
1552         if (rta[RTA_DST-1]) {
1553                 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1554                         return -EINVAL;
1555                 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1556         }
1557         if (rta[RTA_SRC-1]) {
1558                 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1559                         return -EINVAL;
1560                 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1561         }
1562         if (rta[RTA_OIF-1]) {
1563                 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1564                         return -EINVAL;
1565                 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1566         }
1567         if (rta[RTA_PRIORITY-1]) {
1568                 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1569                         return -EINVAL;
1570                 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1571         }
1572         return 0;
1573 }
1574
1575 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1576 {
1577         struct rtmsg *r = NLMSG_DATA(nlh);
1578         struct in6_rtmsg rtmsg;
1579
1580         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1581                 return -EINVAL;
1582         return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1583 }
1584
1585 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1586 {
1587         struct rtmsg *r = NLMSG_DATA(nlh);
1588         struct in6_rtmsg rtmsg;
1589
1590         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1591                 return -EINVAL;
1592         return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1593 }
1594
1595 struct rt6_rtnl_dump_arg
1596 {
1597         struct sk_buff *skb;
1598         struct netlink_callback *cb;
1599 };
1600
1601 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1602                          struct in6_addr *dst, struct in6_addr *src,
1603                          int iif, int type, u32 pid, u32 seq,
1604                          int prefix, unsigned int flags)
1605 {
1606         struct rtmsg *rtm;
1607         struct nlmsghdr  *nlh;
1608         unsigned char    *b = skb->tail;
1609         struct rta_cacheinfo ci;
1610
1611         if (prefix) {   /* user wants prefix routes only */
1612                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1613                         /* success since this is not a prefix route */
1614                         return 1;
1615                 }
1616         }
1617
1618         nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1619         rtm = NLMSG_DATA(nlh);
1620         rtm->rtm_family = AF_INET6;
1621         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1622         rtm->rtm_src_len = rt->rt6i_src.plen;
1623         rtm->rtm_tos = 0;
1624         rtm->rtm_table = RT_TABLE_MAIN;
1625         if (rt->rt6i_flags&RTF_REJECT)
1626                 rtm->rtm_type = RTN_UNREACHABLE;
1627         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1628                 rtm->rtm_type = RTN_LOCAL;
1629         else
1630                 rtm->rtm_type = RTN_UNICAST;
1631         rtm->rtm_flags = 0;
1632         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1633         rtm->rtm_protocol = rt->rt6i_protocol;
1634         if (rt->rt6i_flags&RTF_DYNAMIC)
1635                 rtm->rtm_protocol = RTPROT_REDIRECT;
1636         else if (rt->rt6i_flags & RTF_ADDRCONF)
1637                 rtm->rtm_protocol = RTPROT_KERNEL;
1638         else if (rt->rt6i_flags&RTF_DEFAULT)
1639                 rtm->rtm_protocol = RTPROT_RA;
1640
1641         if (rt->rt6i_flags&RTF_CACHE)
1642                 rtm->rtm_flags |= RTM_F_CLONED;
1643
1644         if (dst) {
1645                 RTA_PUT(skb, RTA_DST, 16, dst);
1646                 rtm->rtm_dst_len = 128;
1647         } else if (rtm->rtm_dst_len)
1648                 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1649 #ifdef CONFIG_IPV6_SUBTREES
1650         if (src) {
1651                 RTA_PUT(skb, RTA_SRC, 16, src);
1652                 rtm->rtm_src_len = 128;
1653         } else if (rtm->rtm_src_len)
1654                 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1655 #endif
1656         if (iif)
1657                 RTA_PUT(skb, RTA_IIF, 4, &iif);
1658         else if (dst) {
1659                 struct in6_addr saddr_buf;
1660                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1661                         RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1662         }
1663         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1664                 goto rtattr_failure;
1665         if (rt->u.dst.neighbour)
1666                 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1667         if (rt->u.dst.dev)
1668                 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1669         RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1670         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1671         if (rt->rt6i_expires)
1672                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1673         else
1674                 ci.rta_expires = 0;
1675         ci.rta_used = rt->u.dst.__use;
1676         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1677         ci.rta_error = rt->u.dst.error;
1678         ci.rta_id = 0;
1679         ci.rta_ts = 0;
1680         ci.rta_tsage = 0;
1681         RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1682         nlh->nlmsg_len = skb->tail - b;
1683         return skb->len;
1684
1685 nlmsg_failure:
1686 rtattr_failure:
1687         skb_trim(skb, b - skb->data);
1688         return -1;
1689 }
1690
1691 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1692 {
1693         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1694         int prefix;
1695
1696         if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1697                 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1698                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1699         } else
1700                 prefix = 0;
1701
1702         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1703                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1704                      prefix, NLM_F_MULTI);
1705 }
1706
1707 static int fib6_dump_node(struct fib6_walker_t *w)
1708 {
1709         int res;
1710         struct rt6_info *rt;
1711
1712         for (rt = w->leaf; rt; rt = rt->u.next) {
1713                 res = rt6_dump_route(rt, w->args);
1714                 if (res < 0) {
1715                         /* Frame is full, suspend walking */
1716                         w->leaf = rt;
1717                         return 1;
1718                 }
1719                 BUG_TRAP(res!=0);
1720         }
1721         w->leaf = NULL;
1722         return 0;
1723 }
1724
1725 static void fib6_dump_end(struct netlink_callback *cb)
1726 {
1727         struct fib6_walker_t *w = (void*)cb->args[0];
1728
1729         if (w) {
1730                 cb->args[0] = 0;
1731                 fib6_walker_unlink(w);
1732                 kfree(w);
1733         }
1734         cb->done = (void*)cb->args[1];
1735         cb->args[1] = 0;
1736 }
1737
1738 static int fib6_dump_done(struct netlink_callback *cb)
1739 {
1740         fib6_dump_end(cb);
1741         return cb->done ? cb->done(cb) : 0;
1742 }
1743
1744 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1745 {
1746         struct rt6_rtnl_dump_arg arg;
1747         struct fib6_walker_t *w;
1748         int res;
1749
1750         arg.skb = skb;
1751         arg.cb = cb;
1752
1753         w = (void*)cb->args[0];
1754         if (w == NULL) {
1755                 /* New dump:
1756                  * 
1757                  * 1. hook callback destructor.
1758                  */
1759                 cb->args[1] = (long)cb->done;
1760                 cb->done = fib6_dump_done;
1761
1762                 /*
1763                  * 2. allocate and initialize walker.
1764                  */
1765                 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1766                 if (w == NULL)
1767                         return -ENOMEM;
1768                 RT6_TRACE("dump<%p", w);
1769                 memset(w, 0, sizeof(*w));
1770                 w->root = &ip6_routing_table;
1771                 w->func = fib6_dump_node;
1772                 w->args = &arg;
1773                 cb->args[0] = (long)w;
1774                 read_lock_bh(&rt6_lock);
1775                 res = fib6_walk(w);
1776                 read_unlock_bh(&rt6_lock);
1777         } else {
1778                 w->args = &arg;
1779                 read_lock_bh(&rt6_lock);
1780                 res = fib6_walk_continue(w);
1781                 read_unlock_bh(&rt6_lock);
1782         }
1783 #if RT6_DEBUG >= 3
1784         if (res <= 0 && skb->len == 0)
1785                 RT6_TRACE("%p>dump end\n", w);
1786 #endif
1787         res = res < 0 ? res : skb->len;
1788         /* res < 0 is an error. (really, impossible)
1789            res == 0 means that dump is complete, but skb still can contain data.
1790            res > 0 dump is not complete, but frame is full.
1791          */
1792         /* Destroy walker, if dump of this table is complete. */
1793         if (res <= 0)
1794                 fib6_dump_end(cb);
1795         return res;
1796 }
1797
1798 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1799 {
1800         struct rtattr **rta = arg;
1801         int iif = 0;
1802         int err = -ENOBUFS;
1803         struct sk_buff *skb;
1804         struct flowi fl;
1805         struct rt6_info *rt;
1806
1807         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1808         if (skb == NULL)
1809                 goto out;
1810
1811         /* Reserve room for dummy headers, this skb can pass
1812            through good chunk of routing engine.
1813          */
1814         skb->mac.raw = skb->data;
1815         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1816
1817         memset(&fl, 0, sizeof(fl));
1818         if (rta[RTA_SRC-1])
1819                 ipv6_addr_copy(&fl.fl6_src,
1820                                (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1821         if (rta[RTA_DST-1])
1822                 ipv6_addr_copy(&fl.fl6_dst,
1823                                (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1824
1825         if (rta[RTA_IIF-1])
1826                 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1827
1828         if (iif) {
1829                 struct net_device *dev;
1830                 dev = __dev_get_by_index(iif);
1831                 if (!dev) {
1832                         err = -ENODEV;
1833                         goto out_free;
1834                 }
1835         }
1836
1837         fl.oif = 0;
1838         if (rta[RTA_OIF-1])
1839                 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1840
1841         rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1842
1843         skb->dst = &rt->u.dst;
1844
1845         NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1846         err = rt6_fill_node(skb, rt, 
1847                             &fl.fl6_dst, &fl.fl6_src,
1848                             iif,
1849                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1850                             nlh->nlmsg_seq, 0, 0);
1851         if (err < 0) {
1852                 err = -EMSGSIZE;
1853                 goto out_free;
1854         }
1855
1856         err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1857         if (err > 0)
1858                 err = 0;
1859 out:
1860         return err;
1861 out_free:
1862         kfree_skb(skb);
1863         goto out;       
1864 }
1865
1866 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, 
1867                         struct netlink_skb_parms *req)
1868 {
1869         struct sk_buff *skb;
1870         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1871         u32 pid = current->pid;
1872         u32 seq = 0;
1873
1874         if (req)
1875                 pid = req->pid;
1876         if (nlh)
1877                 seq = nlh->nlmsg_seq;
1878         
1879         skb = alloc_skb(size, gfp_any());
1880         if (!skb) {
1881                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1882                 return;
1883         }
1884         if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1885                 kfree_skb(skb);
1886                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1887                 return;
1888         }
1889         NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1890         netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1891 }
1892
1893 /*
1894  *      /proc
1895  */
1896
1897 #ifdef CONFIG_PROC_FS
1898
1899 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1900
1901 struct rt6_proc_arg
1902 {
1903         char *buffer;
1904         int offset;
1905         int length;
1906         int skip;
1907         int len;
1908 };
1909
1910 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1911 {
1912         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1913         int i;
1914
1915         if (arg->skip < arg->offset / RT6_INFO_LEN) {
1916                 arg->skip++;
1917                 return 0;
1918         }
1919
1920         if (arg->len >= arg->length)
1921                 return 0;
1922
1923         for (i=0; i<16; i++) {
1924                 sprintf(arg->buffer + arg->len, "%02x",
1925                         rt->rt6i_dst.addr.s6_addr[i]);
1926                 arg->len += 2;
1927         }
1928         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1929                             rt->rt6i_dst.plen);
1930
1931 #ifdef CONFIG_IPV6_SUBTREES
1932         for (i=0; i<16; i++) {
1933                 sprintf(arg->buffer + arg->len, "%02x",
1934                         rt->rt6i_src.addr.s6_addr[i]);
1935                 arg->len += 2;
1936         }
1937         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1938                             rt->rt6i_src.plen);
1939 #else
1940         sprintf(arg->buffer + arg->len,
1941                 "00000000000000000000000000000000 00 ");
1942         arg->len += 36;
1943 #endif
1944
1945         if (rt->rt6i_nexthop) {
1946                 for (i=0; i<16; i++) {
1947                         sprintf(arg->buffer + arg->len, "%02x",
1948                                 rt->rt6i_nexthop->primary_key[i]);
1949                         arg->len += 2;
1950                 }
1951         } else {
1952                 sprintf(arg->buffer + arg->len,
1953                         "00000000000000000000000000000000");
1954                 arg->len += 32;
1955         }
1956         arg->len += sprintf(arg->buffer + arg->len,
1957                             " %08x %08x %08x %08x %8s\n",
1958                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1959                             rt->u.dst.__use, rt->rt6i_flags, 
1960                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
1961         return 0;
1962 }
1963
1964 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1965 {
1966         struct rt6_proc_arg arg;
1967         arg.buffer = buffer;
1968         arg.offset = offset;
1969         arg.length = length;
1970         arg.skip = 0;
1971         arg.len = 0;
1972
1973         read_lock_bh(&rt6_lock);
1974         fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1975         read_unlock_bh(&rt6_lock);
1976
1977         *start = buffer;
1978         if (offset)
1979                 *start += offset % RT6_INFO_LEN;
1980
1981         arg.len -= offset % RT6_INFO_LEN;
1982
1983         if (arg.len > length)
1984                 arg.len = length;
1985         if (arg.len < 0)
1986                 arg.len = 0;
1987
1988         return arg.len;
1989 }
1990
1991 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1992 {
1993         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1994                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1995                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1996                       rt6_stats.fib_rt_cache,
1997                       atomic_read(&ip6_dst_ops.entries),
1998                       rt6_stats.fib_discarded_routes);
1999
2000         return 0;
2001 }
2002
2003 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2004 {
2005         return single_open(file, rt6_stats_seq_show, NULL);
2006 }
2007
2008 static struct file_operations rt6_stats_seq_fops = {
2009         .owner   = THIS_MODULE,
2010         .open    = rt6_stats_seq_open,
2011         .read    = seq_read,
2012         .llseek  = seq_lseek,
2013         .release = single_release,
2014 };
2015 #endif  /* CONFIG_PROC_FS */
2016
2017 #ifdef CONFIG_SYSCTL
2018
2019 static int flush_delay;
2020
2021 static
2022 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2023                               void __user *buffer, size_t *lenp, loff_t *ppos)
2024 {
2025         if (write) {
2026                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2027                 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2028                 return 0;
2029         } else
2030                 return -EINVAL;
2031 }
2032
2033 ctl_table ipv6_route_table[] = {
2034         {
2035                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
2036                 .procname       =       "flush",
2037                 .data           =       &flush_delay,
2038                 .maxlen         =       sizeof(int),
2039                 .mode           =       0200,
2040                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
2041         },
2042         {
2043                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
2044                 .procname       =       "gc_thresh",
2045                 .data           =       &ip6_dst_ops.gc_thresh,
2046                 .maxlen         =       sizeof(int),
2047                 .mode           =       0644,
2048                 .proc_handler   =       &proc_dointvec,
2049         },
2050         {
2051                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2052                 .procname       =       "max_size",
2053                 .data           =       &ip6_rt_max_size,
2054                 .maxlen         =       sizeof(int),
2055                 .mode           =       0644,
2056                 .proc_handler   =       &proc_dointvec,
2057         },
2058         {
2059                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2060                 .procname       =       "gc_min_interval",
2061                 .data           =       &ip6_rt_gc_min_interval,
2062                 .maxlen         =       sizeof(int),
2063                 .mode           =       0644,
2064                 .proc_handler   =       &proc_dointvec_jiffies,
2065                 .strategy       =       &sysctl_jiffies,
2066         },
2067         {
2068                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2069                 .procname       =       "gc_timeout",
2070                 .data           =       &ip6_rt_gc_timeout,
2071                 .maxlen         =       sizeof(int),
2072                 .mode           =       0644,
2073                 .proc_handler   =       &proc_dointvec_jiffies,
2074                 .strategy       =       &sysctl_jiffies,
2075         },
2076         {
2077                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2078                 .procname       =       "gc_interval",
2079                 .data           =       &ip6_rt_gc_interval,
2080                 .maxlen         =       sizeof(int),
2081                 .mode           =       0644,
2082                 .proc_handler   =       &proc_dointvec_jiffies,
2083                 .strategy       =       &sysctl_jiffies,
2084         },
2085         {
2086                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2087                 .procname       =       "gc_elasticity",
2088                 .data           =       &ip6_rt_gc_elasticity,
2089                 .maxlen         =       sizeof(int),
2090                 .mode           =       0644,
2091                 .proc_handler   =       &proc_dointvec_jiffies,
2092                 .strategy       =       &sysctl_jiffies,
2093         },
2094         {
2095                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2096                 .procname       =       "mtu_expires",
2097                 .data           =       &ip6_rt_mtu_expires,
2098                 .maxlen         =       sizeof(int),
2099                 .mode           =       0644,
2100                 .proc_handler   =       &proc_dointvec_jiffies,
2101                 .strategy       =       &sysctl_jiffies,
2102         },
2103         {
2104                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2105                 .procname       =       "min_adv_mss",
2106                 .data           =       &ip6_rt_min_advmss,
2107                 .maxlen         =       sizeof(int),
2108                 .mode           =       0644,
2109                 .proc_handler   =       &proc_dointvec_jiffies,
2110                 .strategy       =       &sysctl_jiffies,
2111         },
2112         {
2113                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2114                 .procname       =       "gc_min_interval_ms",
2115                 .data           =       &ip6_rt_gc_min_interval,
2116                 .maxlen         =       sizeof(int),
2117                 .mode           =       0644,
2118                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2119                 .strategy       =       &sysctl_ms_jiffies,
2120         },
2121         { .ctl_name = 0 }
2122 };
2123
2124 #endif
2125
2126 void __init ip6_route_init(void)
2127 {
2128         struct proc_dir_entry *p;
2129
2130         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2131                                                      sizeof(struct rt6_info),
2132                                                      0, SLAB_HWCACHE_ALIGN,
2133                                                      NULL, NULL);
2134         if (!ip6_dst_ops.kmem_cachep)
2135                 panic("cannot create ip6_dst_cache");
2136
2137         fib6_init();
2138 #ifdef  CONFIG_PROC_FS
2139         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2140         if (p)
2141                 p->owner = THIS_MODULE;
2142
2143         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2144 #endif
2145 #ifdef CONFIG_XFRM
2146         xfrm6_init();
2147 #endif
2148 }
2149
2150 void ip6_route_cleanup(void)
2151 {
2152 #ifdef CONFIG_PROC_FS
2153         proc_net_remove("ipv6_route");
2154         proc_net_remove("rt6_stats");
2155 #endif
2156 #ifdef CONFIG_XFRM
2157         xfrm6_fini();
2158 #endif
2159         rt6_ifdown(NULL);
2160         fib6_gc_cleanup();
2161         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2162 }