]> err.no Git - linux-2.6/blob - net/ipv6/route.c
20691285aee513035b657943fa972be7657cc592
[linux-2.6] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  *      Ville Nuorvala
26  *              Fixed routing subtrees.
27  */
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41
42 #ifdef  CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46
47 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
53 #include <net/tcp.h>
54 #include <linux/rtnetlink.h>
55 #include <net/dst.h>
56 #include <net/xfrm.h>
57 #include <net/netevent.h>
58 #include <net/netlink.h>
59
60 #include <asm/uaccess.h>
61
62 #ifdef CONFIG_SYSCTL
63 #include <linux/sysctl.h>
64 #endif
65
66 /* Set to 3 to get tracing. */
67 #define RT6_DEBUG 2
68
69 #if RT6_DEBUG >= 3
70 #define RDBG(x) printk x
71 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
72 #else
73 #define RDBG(x)
74 #define RT6_TRACE(x...) do { ; } while (0)
75 #endif
76
77 #define CLONE_OFFLINK_ROUTE 0
78
79 static int ip6_rt_max_size = 4096;
80 static int ip6_rt_gc_min_interval = HZ / 2;
81 static int ip6_rt_gc_timeout = 60*HZ;
82 int ip6_rt_gc_interval = 30*HZ;
83 static int ip6_rt_gc_elasticity = 9;
84 static int ip6_rt_mtu_expires = 10*60*HZ;
85 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
86
87 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
88 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
89 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
90 static void             ip6_dst_destroy(struct dst_entry *);
91 static void             ip6_dst_ifdown(struct dst_entry *,
92                                        struct net_device *dev, int how);
93 static int               ip6_dst_gc(void);
94
95 static int              ip6_pkt_discard(struct sk_buff *skb);
96 static int              ip6_pkt_discard_out(struct sk_buff *skb);
97 static void             ip6_link_failure(struct sk_buff *skb);
98 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
99
100 #ifdef CONFIG_IPV6_ROUTE_INFO
101 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
102                                            struct in6_addr *gwaddr, int ifindex,
103                                            unsigned pref);
104 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
105                                            struct in6_addr *gwaddr, int ifindex);
106 #endif
107
108 static struct dst_ops ip6_dst_ops = {
109         .family                 =       AF_INET6,
110         .protocol               =       __constant_htons(ETH_P_IPV6),
111         .gc                     =       ip6_dst_gc,
112         .gc_thresh              =       1024,
113         .check                  =       ip6_dst_check,
114         .destroy                =       ip6_dst_destroy,
115         .ifdown                 =       ip6_dst_ifdown,
116         .negative_advice        =       ip6_negative_advice,
117         .link_failure           =       ip6_link_failure,
118         .update_pmtu            =       ip6_rt_update_pmtu,
119         .entry_size             =       sizeof(struct rt6_info),
120 };
121
122 struct rt6_info ip6_null_entry = {
123         .u = {
124                 .dst = {
125                         .__refcnt       = ATOMIC_INIT(1),
126                         .__use          = 1,
127                         .dev            = &loopback_dev,
128                         .obsolete       = -1,
129                         .error          = -ENETUNREACH,
130                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
131                         .input          = ip6_pkt_discard,
132                         .output         = ip6_pkt_discard_out,
133                         .ops            = &ip6_dst_ops,
134                         .path           = (struct dst_entry*)&ip6_null_entry,
135                 }
136         },
137         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
138         .rt6i_metric    = ~(u32) 0,
139         .rt6i_ref       = ATOMIC_INIT(1),
140 };
141
142 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
143
144 struct rt6_info ip6_prohibit_entry = {
145         .u = {
146                 .dst = {
147                         .__refcnt       = ATOMIC_INIT(1),
148                         .__use          = 1,
149                         .dev            = &loopback_dev,
150                         .obsolete       = -1,
151                         .error          = -EACCES,
152                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
153                         .input          = ip6_pkt_discard,
154                         .output         = ip6_pkt_discard_out,
155                         .ops            = &ip6_dst_ops,
156                         .path           = (struct dst_entry*)&ip6_prohibit_entry,
157                 }
158         },
159         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
160         .rt6i_metric    = ~(u32) 0,
161         .rt6i_ref       = ATOMIC_INIT(1),
162 };
163
164 struct rt6_info ip6_blk_hole_entry = {
165         .u = {
166                 .dst = {
167                         .__refcnt       = ATOMIC_INIT(1),
168                         .__use          = 1,
169                         .dev            = &loopback_dev,
170                         .obsolete       = -1,
171                         .error          = -EINVAL,
172                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
173                         .input          = ip6_pkt_discard,
174                         .output         = ip6_pkt_discard_out,
175                         .ops            = &ip6_dst_ops,
176                         .path           = (struct dst_entry*)&ip6_blk_hole_entry,
177                 }
178         },
179         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
180         .rt6i_metric    = ~(u32) 0,
181         .rt6i_ref       = ATOMIC_INIT(1),
182 };
183
184 #endif
185
186 /* allocate dst with ip6_dst_ops */
187 static __inline__ struct rt6_info *ip6_dst_alloc(void)
188 {
189         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
190 }
191
192 static void ip6_dst_destroy(struct dst_entry *dst)
193 {
194         struct rt6_info *rt = (struct rt6_info *)dst;
195         struct inet6_dev *idev = rt->rt6i_idev;
196
197         if (idev != NULL) {
198                 rt->rt6i_idev = NULL;
199                 in6_dev_put(idev);
200         }       
201 }
202
203 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
204                            int how)
205 {
206         struct rt6_info *rt = (struct rt6_info *)dst;
207         struct inet6_dev *idev = rt->rt6i_idev;
208
209         if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
210                 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
211                 if (loopback_idev != NULL) {
212                         rt->rt6i_idev = loopback_idev;
213                         in6_dev_put(idev);
214                 }
215         }
216 }
217
218 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
219 {
220         return (rt->rt6i_flags & RTF_EXPIRES &&
221                 time_after(jiffies, rt->rt6i_expires));
222 }
223
224 static inline int rt6_need_strict(struct in6_addr *daddr)
225 {
226         return (ipv6_addr_type(daddr) &
227                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
228 }
229
230 /*
231  *      Route lookup. Any table->tb6_lock is implied.
232  */
233
234 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
235                                                     int oif,
236                                                     int strict)
237 {
238         struct rt6_info *local = NULL;
239         struct rt6_info *sprt;
240
241         if (oif) {
242                 for (sprt = rt; sprt; sprt = sprt->u.next) {
243                         struct net_device *dev = sprt->rt6i_dev;
244                         if (dev->ifindex == oif)
245                                 return sprt;
246                         if (dev->flags & IFF_LOOPBACK) {
247                                 if (sprt->rt6i_idev == NULL ||
248                                     sprt->rt6i_idev->dev->ifindex != oif) {
249                                         if (strict && oif)
250                                                 continue;
251                                         if (local && (!oif || 
252                                                       local->rt6i_idev->dev->ifindex == oif))
253                                                 continue;
254                                 }
255                                 local = sprt;
256                         }
257                 }
258
259                 if (local)
260                         return local;
261
262                 if (strict)
263                         return &ip6_null_entry;
264         }
265         return rt;
266 }
267
268 #ifdef CONFIG_IPV6_ROUTER_PREF
269 static void rt6_probe(struct rt6_info *rt)
270 {
271         struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
272         /*
273          * Okay, this does not seem to be appropriate
274          * for now, however, we need to check if it
275          * is really so; aka Router Reachability Probing.
276          *
277          * Router Reachability Probe MUST be rate-limited
278          * to no more than one per minute.
279          */
280         if (!neigh || (neigh->nud_state & NUD_VALID))
281                 return;
282         read_lock_bh(&neigh->lock);
283         if (!(neigh->nud_state & NUD_VALID) &&
284             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
285                 struct in6_addr mcaddr;
286                 struct in6_addr *target;
287
288                 neigh->updated = jiffies;
289                 read_unlock_bh(&neigh->lock);
290
291                 target = (struct in6_addr *)&neigh->primary_key;
292                 addrconf_addr_solict_mult(target, &mcaddr);
293                 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
294         } else
295                 read_unlock_bh(&neigh->lock);
296 }
297 #else
298 static inline void rt6_probe(struct rt6_info *rt)
299 {
300         return;
301 }
302 #endif
303
304 /*
305  * Default Router Selection (RFC 2461 6.3.6)
306  */
307 static int inline rt6_check_dev(struct rt6_info *rt, int oif)
308 {
309         struct net_device *dev = rt->rt6i_dev;
310         if (!oif || dev->ifindex == oif)
311                 return 2;
312         if ((dev->flags & IFF_LOOPBACK) &&
313             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
314                 return 1;
315         return 0;
316 }
317
318 static int inline rt6_check_neigh(struct rt6_info *rt)
319 {
320         struct neighbour *neigh = rt->rt6i_nexthop;
321         int m = 0;
322         if (rt->rt6i_flags & RTF_NONEXTHOP ||
323             !(rt->rt6i_flags & RTF_GATEWAY))
324                 m = 1;
325         else if (neigh) {
326                 read_lock_bh(&neigh->lock);
327                 if (neigh->nud_state & NUD_VALID)
328                         m = 2;
329                 read_unlock_bh(&neigh->lock);
330         }
331         return m;
332 }
333
334 static int rt6_score_route(struct rt6_info *rt, int oif,
335                            int strict)
336 {
337         int m, n;
338                 
339         m = rt6_check_dev(rt, oif);
340         if (!m && (strict & RT6_LOOKUP_F_IFACE))
341                 return -1;
342 #ifdef CONFIG_IPV6_ROUTER_PREF
343         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
344 #endif
345         n = rt6_check_neigh(rt);
346         if (n > 1)
347                 m |= 16;
348         else if (!n && strict & RT6_LOOKUP_F_REACHABLE)
349                 return -1;
350         return m;
351 }
352
353 static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
354                                    int strict)
355 {
356         struct rt6_info *match = NULL, *last = NULL;
357         struct rt6_info *rt, *rt0 = *head;
358         u32 metric;
359         int mpri = -1;
360
361         RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
362                   __FUNCTION__, head, head ? *head : NULL, oif);
363
364         for (rt = rt0, metric = rt0->rt6i_metric;
365              rt && rt->rt6i_metric == metric && (!last || rt != rt0);
366              rt = rt->u.next) {
367                 int m;
368
369                 if (rt6_check_expired(rt))
370                         continue;
371
372                 last = rt;
373
374                 m = rt6_score_route(rt, oif, strict);
375                 if (m < 0)
376                         continue;
377
378                 if (m > mpri) {
379                         rt6_probe(match);
380                         match = rt;
381                         mpri = m;
382                 } else {
383                         rt6_probe(rt);
384                 }
385         }
386
387         if (!match &&
388             (strict & RT6_LOOKUP_F_REACHABLE) &&
389             last && last != rt0) {
390                 /* no entries matched; do round-robin */
391                 static DEFINE_SPINLOCK(lock);
392                 spin_lock(&lock);
393                 *head = rt0->u.next;
394                 rt0->u.next = last->u.next;
395                 last->u.next = rt0;
396                 spin_unlock(&lock);
397         }
398
399         RT6_TRACE("%s() => %p, score=%d\n",
400                   __FUNCTION__, match, mpri);
401
402         return (match ? match : &ip6_null_entry);
403 }
404
405 #ifdef CONFIG_IPV6_ROUTE_INFO
406 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
407                   struct in6_addr *gwaddr)
408 {
409         struct route_info *rinfo = (struct route_info *) opt;
410         struct in6_addr prefix_buf, *prefix;
411         unsigned int pref;
412         u32 lifetime;
413         struct rt6_info *rt;
414
415         if (len < sizeof(struct route_info)) {
416                 return -EINVAL;
417         }
418
419         /* Sanity check for prefix_len and length */
420         if (rinfo->length > 3) {
421                 return -EINVAL;
422         } else if (rinfo->prefix_len > 128) {
423                 return -EINVAL;
424         } else if (rinfo->prefix_len > 64) {
425                 if (rinfo->length < 2) {
426                         return -EINVAL;
427                 }
428         } else if (rinfo->prefix_len > 0) {
429                 if (rinfo->length < 1) {
430                         return -EINVAL;
431                 }
432         }
433
434         pref = rinfo->route_pref;
435         if (pref == ICMPV6_ROUTER_PREF_INVALID)
436                 pref = ICMPV6_ROUTER_PREF_MEDIUM;
437
438         lifetime = htonl(rinfo->lifetime);
439         if (lifetime == 0xffffffff) {
440                 /* infinity */
441         } else if (lifetime > 0x7fffffff/HZ) {
442                 /* Avoid arithmetic overflow */
443                 lifetime = 0x7fffffff/HZ - 1;
444         }
445
446         if (rinfo->length == 3)
447                 prefix = (struct in6_addr *)rinfo->prefix;
448         else {
449                 /* this function is safe */
450                 ipv6_addr_prefix(&prefix_buf,
451                                  (struct in6_addr *)rinfo->prefix,
452                                  rinfo->prefix_len);
453                 prefix = &prefix_buf;
454         }
455
456         rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
457
458         if (rt && !lifetime) {
459                 ip6_del_rt(rt);
460                 rt = NULL;
461         }
462
463         if (!rt && lifetime)
464                 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
465                                         pref);
466         else if (rt)
467                 rt->rt6i_flags = RTF_ROUTEINFO |
468                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
469
470         if (rt) {
471                 if (lifetime == 0xffffffff) {
472                         rt->rt6i_flags &= ~RTF_EXPIRES;
473                 } else {
474                         rt->rt6i_expires = jiffies + HZ * lifetime;
475                         rt->rt6i_flags |= RTF_EXPIRES;
476                 }
477                 dst_release(&rt->u.dst);
478         }
479         return 0;
480 }
481 #endif
482
483 #define BACKTRACK(saddr) \
484 do { \
485         if (rt == &ip6_null_entry) { \
486                 struct fib6_node *pn; \
487                 while (fn) { \
488                         if (fn->fn_flags & RTN_TL_ROOT) \
489                                 goto out; \
490                         pn = fn->parent; \
491                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
492                                 fn = fib6_lookup(pn->subtree, NULL, saddr); \
493                         else \
494                                 fn = pn; \
495                         if (fn->fn_flags & RTN_RTINFO) \
496                                 goto restart; \
497                 } \
498         } \
499 } while(0)
500
501 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
502                                              struct flowi *fl, int flags)
503 {
504         struct fib6_node *fn;
505         struct rt6_info *rt;
506
507         read_lock_bh(&table->tb6_lock);
508         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
509 restart:
510         rt = fn->leaf;
511         rt = rt6_device_match(rt, fl->oif, flags);
512         BACKTRACK(&fl->fl6_src);
513         dst_hold(&rt->u.dst);
514 out:
515         read_unlock_bh(&table->tb6_lock);
516
517         rt->u.dst.lastuse = jiffies;
518         rt->u.dst.__use++;
519
520         return rt;
521
522 }
523
524 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
525                             int oif, int strict)
526 {
527         struct flowi fl = {
528                 .oif = oif,
529                 .nl_u = {
530                         .ip6_u = {
531                                 .daddr = *daddr,
532                                 /* TODO: saddr */
533                         },
534                 },
535         };
536         struct dst_entry *dst;
537         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
538
539         dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
540         if (dst->error == 0)
541                 return (struct rt6_info *) dst;
542
543         dst_release(dst);
544
545         return NULL;
546 }
547
548 /* ip6_ins_rt is called with FREE table->tb6_lock.
549    It takes new route entry, the addition fails by any reason the
550    route is freed. In any case, if caller does not hold it, it may
551    be destroyed.
552  */
553
554 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
555 {
556         int err;
557         struct fib6_table *table;
558
559         table = rt->rt6i_table;
560         write_lock_bh(&table->tb6_lock);
561         err = fib6_add(&table->tb6_root, rt, info);
562         write_unlock_bh(&table->tb6_lock);
563
564         return err;
565 }
566
567 int ip6_ins_rt(struct rt6_info *rt)
568 {
569         return __ip6_ins_rt(rt, NULL);
570 }
571
572 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
573                                       struct in6_addr *saddr)
574 {
575         struct rt6_info *rt;
576
577         /*
578          *      Clone the route.
579          */
580
581         rt = ip6_rt_copy(ort);
582
583         if (rt) {
584                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
585                         if (rt->rt6i_dst.plen != 128 &&
586                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
587                                 rt->rt6i_flags |= RTF_ANYCAST;
588                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
589                 }
590
591                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
592                 rt->rt6i_dst.plen = 128;
593                 rt->rt6i_flags |= RTF_CACHE;
594                 rt->u.dst.flags |= DST_HOST;
595
596 #ifdef CONFIG_IPV6_SUBTREES
597                 if (rt->rt6i_src.plen && saddr) {
598                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
599                         rt->rt6i_src.plen = 128;
600                 }
601 #endif
602
603                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
604
605         }
606
607         return rt;
608 }
609
610 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
611 {
612         struct rt6_info *rt = ip6_rt_copy(ort);
613         if (rt) {
614                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
615                 rt->rt6i_dst.plen = 128;
616                 rt->rt6i_flags |= RTF_CACHE;
617                 if (rt->rt6i_flags & RTF_REJECT)
618                         rt->u.dst.error = ort->u.dst.error;
619                 rt->u.dst.flags |= DST_HOST;
620                 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
621         }
622         return rt;
623 }
624
625 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
626                                             struct flowi *fl, int flags)
627 {
628         struct fib6_node *fn;
629         struct rt6_info *rt, *nrt;
630         int strict = 0;
631         int attempts = 3;
632         int err;
633         int reachable = RT6_LOOKUP_F_REACHABLE;
634
635         strict |= flags & RT6_LOOKUP_F_IFACE;
636
637 relookup:
638         read_lock_bh(&table->tb6_lock);
639
640 restart_2:
641         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
642
643 restart:
644         rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
645         BACKTRACK(&fl->fl6_src);
646         if (rt == &ip6_null_entry ||
647             rt->rt6i_flags & RTF_CACHE)
648                 goto out;
649
650         dst_hold(&rt->u.dst);
651         read_unlock_bh(&table->tb6_lock);
652
653         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
654                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
655         else {
656 #if CLONE_OFFLINK_ROUTE
657                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
658 #else
659                 goto out2;
660 #endif
661         }
662
663         dst_release(&rt->u.dst);
664         rt = nrt ? : &ip6_null_entry;
665
666         dst_hold(&rt->u.dst);
667         if (nrt) {
668                 err = ip6_ins_rt(nrt);
669                 if (!err)
670                         goto out2;
671         }
672
673         if (--attempts <= 0)
674                 goto out2;
675
676         /*
677          * Race condition! In the gap, when table->tb6_lock was
678          * released someone could insert this route.  Relookup.
679          */
680         dst_release(&rt->u.dst);
681         goto relookup;
682
683 out:
684         if (reachable) {
685                 reachable = 0;
686                 goto restart_2;
687         }
688         dst_hold(&rt->u.dst);
689         read_unlock_bh(&table->tb6_lock);
690 out2:
691         rt->u.dst.lastuse = jiffies;
692         rt->u.dst.__use++;
693
694         return rt;
695 }
696
697 void ip6_route_input(struct sk_buff *skb)
698 {
699         struct ipv6hdr *iph = skb->nh.ipv6h;
700         struct flowi fl = {
701                 .iif = skb->dev->ifindex,
702                 .nl_u = {
703                         .ip6_u = {
704                                 .daddr = iph->daddr,
705                                 .saddr = iph->saddr,
706                                 .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
707                         },
708                 },
709                 .proto = iph->nexthdr,
710         };
711         int flags = rt6_need_strict(&iph->daddr) ? RT6_LOOKUP_F_IFACE : 0;
712
713         skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
714 }
715
716 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
717                                              struct flowi *fl, int flags)
718 {
719         struct fib6_node *fn;
720         struct rt6_info *rt, *nrt;
721         int strict = 0;
722         int attempts = 3;
723         int err;
724         int reachable = RT6_LOOKUP_F_REACHABLE;
725
726         strict |= flags & RT6_LOOKUP_F_IFACE;
727
728 relookup:
729         read_lock_bh(&table->tb6_lock);
730
731 restart_2:
732         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
733
734 restart:
735         rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
736         BACKTRACK(&fl->fl6_src);
737         if (rt == &ip6_null_entry ||
738             rt->rt6i_flags & RTF_CACHE)
739                 goto out;
740
741         dst_hold(&rt->u.dst);
742         read_unlock_bh(&table->tb6_lock);
743
744         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
745                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
746         else {
747 #if CLONE_OFFLINK_ROUTE
748                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
749 #else
750                 goto out2;
751 #endif
752         }
753
754         dst_release(&rt->u.dst);
755         rt = nrt ? : &ip6_null_entry;
756
757         dst_hold(&rt->u.dst);
758         if (nrt) {
759                 err = ip6_ins_rt(nrt);
760                 if (!err)
761                         goto out2;
762         }
763
764         if (--attempts <= 0)
765                 goto out2;
766
767         /*
768          * Race condition! In the gap, when table->tb6_lock was
769          * released someone could insert this route.  Relookup.
770          */
771         dst_release(&rt->u.dst);
772         goto relookup;
773
774 out:
775         if (reachable) {
776                 reachable = 0;
777                 goto restart_2;
778         }
779         dst_hold(&rt->u.dst);
780         read_unlock_bh(&table->tb6_lock);
781 out2:
782         rt->u.dst.lastuse = jiffies;
783         rt->u.dst.__use++;
784         return rt;
785 }
786
787 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
788 {
789         int flags = 0;
790
791         if (rt6_need_strict(&fl->fl6_dst))
792                 flags |= RT6_LOOKUP_F_IFACE;
793
794         return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
795 }
796
797
798 /*
799  *      Destination cache support functions
800  */
801
802 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
803 {
804         struct rt6_info *rt;
805
806         rt = (struct rt6_info *) dst;
807
808         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
809                 return dst;
810
811         return NULL;
812 }
813
814 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
815 {
816         struct rt6_info *rt = (struct rt6_info *) dst;
817
818         if (rt) {
819                 if (rt->rt6i_flags & RTF_CACHE)
820                         ip6_del_rt(rt);
821                 else
822                         dst_release(dst);
823         }
824         return NULL;
825 }
826
827 static void ip6_link_failure(struct sk_buff *skb)
828 {
829         struct rt6_info *rt;
830
831         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
832
833         rt = (struct rt6_info *) skb->dst;
834         if (rt) {
835                 if (rt->rt6i_flags&RTF_CACHE) {
836                         dst_set_expires(&rt->u.dst, 0);
837                         rt->rt6i_flags |= RTF_EXPIRES;
838                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
839                         rt->rt6i_node->fn_sernum = -1;
840         }
841 }
842
843 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
844 {
845         struct rt6_info *rt6 = (struct rt6_info*)dst;
846
847         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
848                 rt6->rt6i_flags |= RTF_MODIFIED;
849                 if (mtu < IPV6_MIN_MTU) {
850                         mtu = IPV6_MIN_MTU;
851                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
852                 }
853                 dst->metrics[RTAX_MTU-1] = mtu;
854                 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
855         }
856 }
857
858 static int ipv6_get_mtu(struct net_device *dev);
859
860 static inline unsigned int ipv6_advmss(unsigned int mtu)
861 {
862         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
863
864         if (mtu < ip6_rt_min_advmss)
865                 mtu = ip6_rt_min_advmss;
866
867         /*
868          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
869          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
870          * IPV6_MAXPLEN is also valid and means: "any MSS, 
871          * rely only on pmtu discovery"
872          */
873         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
874                 mtu = IPV6_MAXPLEN;
875         return mtu;
876 }
877
878 static struct dst_entry *ndisc_dst_gc_list;
879 static DEFINE_SPINLOCK(ndisc_lock);
880
881 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
882                                   struct neighbour *neigh,
883                                   struct in6_addr *addr,
884                                   int (*output)(struct sk_buff *))
885 {
886         struct rt6_info *rt;
887         struct inet6_dev *idev = in6_dev_get(dev);
888
889         if (unlikely(idev == NULL))
890                 return NULL;
891
892         rt = ip6_dst_alloc();
893         if (unlikely(rt == NULL)) {
894                 in6_dev_put(idev);
895                 goto out;
896         }
897
898         dev_hold(dev);
899         if (neigh)
900                 neigh_hold(neigh);
901         else
902                 neigh = ndisc_get_neigh(dev, addr);
903
904         rt->rt6i_dev      = dev;
905         rt->rt6i_idev     = idev;
906         rt->rt6i_nexthop  = neigh;
907         atomic_set(&rt->u.dst.__refcnt, 1);
908         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
909         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
910         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
911         rt->u.dst.output  = output;
912
913 #if 0   /* there's no chance to use these for ndisc */
914         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
915                                 ? DST_HOST 
916                                 : 0;
917         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
918         rt->rt6i_dst.plen = 128;
919 #endif
920
921         spin_lock_bh(&ndisc_lock);
922         rt->u.dst.next = ndisc_dst_gc_list;
923         ndisc_dst_gc_list = &rt->u.dst;
924         spin_unlock_bh(&ndisc_lock);
925
926         fib6_force_start_gc();
927
928 out:
929         return (struct dst_entry *)rt;
930 }
931
932 int ndisc_dst_gc(int *more)
933 {
934         struct dst_entry *dst, *next, **pprev;
935         int freed;
936
937         next = NULL;
938         freed = 0;
939
940         spin_lock_bh(&ndisc_lock);
941         pprev = &ndisc_dst_gc_list;
942
943         while ((dst = *pprev) != NULL) {
944                 if (!atomic_read(&dst->__refcnt)) {
945                         *pprev = dst->next;
946                         dst_free(dst);
947                         freed++;
948                 } else {
949                         pprev = &dst->next;
950                         (*more)++;
951                 }
952         }
953
954         spin_unlock_bh(&ndisc_lock);
955
956         return freed;
957 }
958
959 static int ip6_dst_gc(void)
960 {
961         static unsigned expire = 30*HZ;
962         static unsigned long last_gc;
963         unsigned long now = jiffies;
964
965         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
966             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
967                 goto out;
968
969         expire++;
970         fib6_run_gc(expire);
971         last_gc = now;
972         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
973                 expire = ip6_rt_gc_timeout>>1;
974
975 out:
976         expire -= expire>>ip6_rt_gc_elasticity;
977         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
978 }
979
980 /* Clean host part of a prefix. Not necessary in radix tree,
981    but results in cleaner routing tables.
982
983    Remove it only when all the things will work!
984  */
985
986 static int ipv6_get_mtu(struct net_device *dev)
987 {
988         int mtu = IPV6_MIN_MTU;
989         struct inet6_dev *idev;
990
991         idev = in6_dev_get(dev);
992         if (idev) {
993                 mtu = idev->cnf.mtu6;
994                 in6_dev_put(idev);
995         }
996         return mtu;
997 }
998
999 int ipv6_get_hoplimit(struct net_device *dev)
1000 {
1001         int hoplimit = ipv6_devconf.hop_limit;
1002         struct inet6_dev *idev;
1003
1004         idev = in6_dev_get(dev);
1005         if (idev) {
1006                 hoplimit = idev->cnf.hop_limit;
1007                 in6_dev_put(idev);
1008         }
1009         return hoplimit;
1010 }
1011
1012 /*
1013  *
1014  */
1015
1016 int ip6_route_add(struct fib6_config *cfg)
1017 {
1018         int err;
1019         struct rt6_info *rt = NULL;
1020         struct net_device *dev = NULL;
1021         struct inet6_dev *idev = NULL;
1022         struct fib6_table *table;
1023         int addr_type;
1024
1025         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1026                 return -EINVAL;
1027 #ifndef CONFIG_IPV6_SUBTREES
1028         if (cfg->fc_src_len)
1029                 return -EINVAL;
1030 #endif
1031         if (cfg->fc_ifindex) {
1032                 err = -ENODEV;
1033                 dev = dev_get_by_index(cfg->fc_ifindex);
1034                 if (!dev)
1035                         goto out;
1036                 idev = in6_dev_get(dev);
1037                 if (!idev)
1038                         goto out;
1039         }
1040
1041         if (cfg->fc_metric == 0)
1042                 cfg->fc_metric = IP6_RT_PRIO_USER;
1043
1044         table = fib6_new_table(cfg->fc_table);
1045         if (table == NULL) {
1046                 err = -ENOBUFS;
1047                 goto out;
1048         }
1049
1050         rt = ip6_dst_alloc();
1051
1052         if (rt == NULL) {
1053                 err = -ENOMEM;
1054                 goto out;
1055         }
1056
1057         rt->u.dst.obsolete = -1;
1058         rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1059
1060         if (cfg->fc_protocol == RTPROT_UNSPEC)
1061                 cfg->fc_protocol = RTPROT_BOOT;
1062         rt->rt6i_protocol = cfg->fc_protocol;
1063
1064         addr_type = ipv6_addr_type(&cfg->fc_dst);
1065
1066         if (addr_type & IPV6_ADDR_MULTICAST)
1067                 rt->u.dst.input = ip6_mc_input;
1068         else
1069                 rt->u.dst.input = ip6_forward;
1070
1071         rt->u.dst.output = ip6_output;
1072
1073         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1074         rt->rt6i_dst.plen = cfg->fc_dst_len;
1075         if (rt->rt6i_dst.plen == 128)
1076                rt->u.dst.flags = DST_HOST;
1077
1078 #ifdef CONFIG_IPV6_SUBTREES
1079         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1080         rt->rt6i_src.plen = cfg->fc_src_len;
1081 #endif
1082
1083         rt->rt6i_metric = cfg->fc_metric;
1084
1085         /* We cannot add true routes via loopback here,
1086            they would result in kernel looping; promote them to reject routes
1087          */
1088         if ((cfg->fc_flags & RTF_REJECT) ||
1089             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1090                 /* hold loopback dev/idev if we haven't done so. */
1091                 if (dev != &loopback_dev) {
1092                         if (dev) {
1093                                 dev_put(dev);
1094                                 in6_dev_put(idev);
1095                         }
1096                         dev = &loopback_dev;
1097                         dev_hold(dev);
1098                         idev = in6_dev_get(dev);
1099                         if (!idev) {
1100                                 err = -ENODEV;
1101                                 goto out;
1102                         }
1103                 }
1104                 rt->u.dst.output = ip6_pkt_discard_out;
1105                 rt->u.dst.input = ip6_pkt_discard;
1106                 rt->u.dst.error = -ENETUNREACH;
1107                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1108                 goto install_route;
1109         }
1110
1111         if (cfg->fc_flags & RTF_GATEWAY) {
1112                 struct in6_addr *gw_addr;
1113                 int gwa_type;
1114
1115                 gw_addr = &cfg->fc_gateway;
1116                 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1117                 gwa_type = ipv6_addr_type(gw_addr);
1118
1119                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1120                         struct rt6_info *grt;
1121
1122                         /* IPv6 strictly inhibits using not link-local
1123                            addresses as nexthop address.
1124                            Otherwise, router will not able to send redirects.
1125                            It is very good, but in some (rare!) circumstances
1126                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1127                            some exceptions. --ANK
1128                          */
1129                         err = -EINVAL;
1130                         if (!(gwa_type&IPV6_ADDR_UNICAST))
1131                                 goto out;
1132
1133                         grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1134
1135                         err = -EHOSTUNREACH;
1136                         if (grt == NULL)
1137                                 goto out;
1138                         if (dev) {
1139                                 if (dev != grt->rt6i_dev) {
1140                                         dst_release(&grt->u.dst);
1141                                         goto out;
1142                                 }
1143                         } else {
1144                                 dev = grt->rt6i_dev;
1145                                 idev = grt->rt6i_idev;
1146                                 dev_hold(dev);
1147                                 in6_dev_hold(grt->rt6i_idev);
1148                         }
1149                         if (!(grt->rt6i_flags&RTF_GATEWAY))
1150                                 err = 0;
1151                         dst_release(&grt->u.dst);
1152
1153                         if (err)
1154                                 goto out;
1155                 }
1156                 err = -EINVAL;
1157                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1158                         goto out;
1159         }
1160
1161         err = -ENODEV;
1162         if (dev == NULL)
1163                 goto out;
1164
1165         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1166                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1167                 if (IS_ERR(rt->rt6i_nexthop)) {
1168                         err = PTR_ERR(rt->rt6i_nexthop);
1169                         rt->rt6i_nexthop = NULL;
1170                         goto out;
1171                 }
1172         }
1173
1174         rt->rt6i_flags = cfg->fc_flags;
1175
1176 install_route:
1177         if (cfg->fc_mx) {
1178                 struct nlattr *nla;
1179                 int remaining;
1180
1181                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1182                         int type = nla->nla_type;
1183
1184                         if (type) {
1185                                 if (type > RTAX_MAX) {
1186                                         err = -EINVAL;
1187                                         goto out;
1188                                 }
1189
1190                                 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1191                         }
1192                 }
1193         }
1194
1195         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1196                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1197         if (!rt->u.dst.metrics[RTAX_MTU-1])
1198                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1199         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1200                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1201         rt->u.dst.dev = dev;
1202         rt->rt6i_idev = idev;
1203         rt->rt6i_table = table;
1204         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1205
1206 out:
1207         if (dev)
1208                 dev_put(dev);
1209         if (idev)
1210                 in6_dev_put(idev);
1211         if (rt)
1212                 dst_free((struct dst_entry *) rt);
1213         return err;
1214 }
1215
1216 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1217 {
1218         int err;
1219         struct fib6_table *table;
1220
1221         if (rt == &ip6_null_entry)
1222                 return -ENOENT;
1223
1224         table = rt->rt6i_table;
1225         write_lock_bh(&table->tb6_lock);
1226
1227         err = fib6_del(rt, info);
1228         dst_release(&rt->u.dst);
1229
1230         write_unlock_bh(&table->tb6_lock);
1231
1232         return err;
1233 }
1234
1235 int ip6_del_rt(struct rt6_info *rt)
1236 {
1237         return __ip6_del_rt(rt, NULL);
1238 }
1239
1240 static int ip6_route_del(struct fib6_config *cfg)
1241 {
1242         struct fib6_table *table;
1243         struct fib6_node *fn;
1244         struct rt6_info *rt;
1245         int err = -ESRCH;
1246
1247         table = fib6_get_table(cfg->fc_table);
1248         if (table == NULL)
1249                 return err;
1250
1251         read_lock_bh(&table->tb6_lock);
1252
1253         fn = fib6_locate(&table->tb6_root,
1254                          &cfg->fc_dst, cfg->fc_dst_len,
1255                          &cfg->fc_src, cfg->fc_src_len);
1256         
1257         if (fn) {
1258                 for (rt = fn->leaf; rt; rt = rt->u.next) {
1259                         if (cfg->fc_ifindex &&
1260                             (rt->rt6i_dev == NULL ||
1261                              rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1262                                 continue;
1263                         if (cfg->fc_flags & RTF_GATEWAY &&
1264                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1265                                 continue;
1266                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1267                                 continue;
1268                         dst_hold(&rt->u.dst);
1269                         read_unlock_bh(&table->tb6_lock);
1270
1271                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1272                 }
1273         }
1274         read_unlock_bh(&table->tb6_lock);
1275
1276         return err;
1277 }
1278
1279 /*
1280  *      Handle redirects
1281  */
1282 struct ip6rd_flowi {
1283         struct flowi fl;
1284         struct in6_addr gateway;
1285 };
1286
1287 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1288                                              struct flowi *fl,
1289                                              int flags)
1290 {
1291         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1292         struct rt6_info *rt;
1293         struct fib6_node *fn;
1294
1295         /*
1296          * Get the "current" route for this destination and
1297          * check if the redirect has come from approriate router.
1298          *
1299          * RFC 2461 specifies that redirects should only be
1300          * accepted if they come from the nexthop to the target.
1301          * Due to the way the routes are chosen, this notion
1302          * is a bit fuzzy and one might need to check all possible
1303          * routes.
1304          */
1305
1306         read_lock_bh(&table->tb6_lock);
1307         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1308 restart:
1309         for (rt = fn->leaf; rt; rt = rt->u.next) {
1310                 /*
1311                  * Current route is on-link; redirect is always invalid.
1312                  *
1313                  * Seems, previous statement is not true. It could
1314                  * be node, which looks for us as on-link (f.e. proxy ndisc)
1315                  * But then router serving it might decide, that we should
1316                  * know truth 8)8) --ANK (980726).
1317                  */
1318                 if (rt6_check_expired(rt))
1319                         continue;
1320                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1321                         continue;
1322                 if (fl->oif != rt->rt6i_dev->ifindex)
1323                         continue;
1324                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1325                         continue;
1326                 break;
1327         }
1328
1329         if (!rt)
1330                 rt = &ip6_null_entry;
1331         BACKTRACK(&fl->fl6_src);
1332 out:
1333         dst_hold(&rt->u.dst);
1334
1335         read_unlock_bh(&table->tb6_lock);
1336
1337         return rt;
1338 };
1339
1340 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1341                                            struct in6_addr *src,
1342                                            struct in6_addr *gateway,
1343                                            struct net_device *dev)
1344 {
1345         struct ip6rd_flowi rdfl = {
1346                 .fl = {
1347                         .oif = dev->ifindex,
1348                         .nl_u = {
1349                                 .ip6_u = {
1350                                         .daddr = *dest,
1351                                         .saddr = *src,
1352                                 },
1353                         },
1354                 },
1355                 .gateway = *gateway,
1356         };
1357         int flags = rt6_need_strict(dest) ? RT6_LOOKUP_F_IFACE : 0;
1358
1359         return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1360 }
1361
1362 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1363                   struct in6_addr *saddr,
1364                   struct neighbour *neigh, u8 *lladdr, int on_link)
1365 {
1366         struct rt6_info *rt, *nrt = NULL;
1367         struct netevent_redirect netevent;
1368
1369         rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1370
1371         if (rt == &ip6_null_entry) {
1372                 if (net_ratelimit())
1373                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1374                                "for redirect target\n");
1375                 goto out;
1376         }
1377
1378         /*
1379          *      We have finally decided to accept it.
1380          */
1381
1382         neigh_update(neigh, lladdr, NUD_STALE, 
1383                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1384                      NEIGH_UPDATE_F_OVERRIDE|
1385                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1386                                      NEIGH_UPDATE_F_ISROUTER))
1387                      );
1388
1389         /*
1390          * Redirect received -> path was valid.
1391          * Look, redirects are sent only in response to data packets,
1392          * so that this nexthop apparently is reachable. --ANK
1393          */
1394         dst_confirm(&rt->u.dst);
1395
1396         /* Duplicate redirect: silently ignore. */
1397         if (neigh == rt->u.dst.neighbour)
1398                 goto out;
1399
1400         nrt = ip6_rt_copy(rt);
1401         if (nrt == NULL)
1402                 goto out;
1403
1404         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1405         if (on_link)
1406                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1407
1408         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1409         nrt->rt6i_dst.plen = 128;
1410         nrt->u.dst.flags |= DST_HOST;
1411
1412         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1413         nrt->rt6i_nexthop = neigh_clone(neigh);
1414         /* Reset pmtu, it may be better */
1415         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1416         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1417
1418         if (ip6_ins_rt(nrt))
1419                 goto out;
1420
1421         netevent.old = &rt->u.dst;
1422         netevent.new = &nrt->u.dst;
1423         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1424
1425         if (rt->rt6i_flags&RTF_CACHE) {
1426                 ip6_del_rt(rt);
1427                 return;
1428         }
1429
1430 out:
1431         dst_release(&rt->u.dst);
1432         return;
1433 }
1434
1435 /*
1436  *      Handle ICMP "packet too big" messages
1437  *      i.e. Path MTU discovery
1438  */
1439
1440 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1441                         struct net_device *dev, u32 pmtu)
1442 {
1443         struct rt6_info *rt, *nrt;
1444         int allfrag = 0;
1445
1446         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1447         if (rt == NULL)
1448                 return;
1449
1450         if (pmtu >= dst_mtu(&rt->u.dst))
1451                 goto out;
1452
1453         if (pmtu < IPV6_MIN_MTU) {
1454                 /*
1455                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link 
1456                  * MTU (1280) and a fragment header should always be included
1457                  * after a node receiving Too Big message reporting PMTU is
1458                  * less than the IPv6 Minimum Link MTU.
1459                  */
1460                 pmtu = IPV6_MIN_MTU;
1461                 allfrag = 1;
1462         }
1463
1464         /* New mtu received -> path was valid.
1465            They are sent only in response to data packets,
1466            so that this nexthop apparently is reachable. --ANK
1467          */
1468         dst_confirm(&rt->u.dst);
1469
1470         /* Host route. If it is static, it would be better
1471            not to override it, but add new one, so that
1472            when cache entry will expire old pmtu
1473            would return automatically.
1474          */
1475         if (rt->rt6i_flags & RTF_CACHE) {
1476                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1477                 if (allfrag)
1478                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1479                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1480                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1481                 goto out;
1482         }
1483
1484         /* Network route.
1485            Two cases are possible:
1486            1. It is connected route. Action: COW
1487            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1488          */
1489         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1490                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1491         else
1492                 nrt = rt6_alloc_clone(rt, daddr);
1493
1494         if (nrt) {
1495                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1496                 if (allfrag)
1497                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1498
1499                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1500                  * happened within 5 mins, the recommended timer is 10 mins.
1501                  * Here this route expiration time is set to ip6_rt_mtu_expires
1502                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1503                  * and detecting PMTU increase will be automatically happened.
1504                  */
1505                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1506                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1507
1508                 ip6_ins_rt(nrt);
1509         }
1510 out:
1511         dst_release(&rt->u.dst);
1512 }
1513
1514 /*
1515  *      Misc support functions
1516  */
1517
1518 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1519 {
1520         struct rt6_info *rt = ip6_dst_alloc();
1521
1522         if (rt) {
1523                 rt->u.dst.input = ort->u.dst.input;
1524                 rt->u.dst.output = ort->u.dst.output;
1525
1526                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1527                 rt->u.dst.dev = ort->u.dst.dev;
1528                 if (rt->u.dst.dev)
1529                         dev_hold(rt->u.dst.dev);
1530                 rt->rt6i_idev = ort->rt6i_idev;
1531                 if (rt->rt6i_idev)
1532                         in6_dev_hold(rt->rt6i_idev);
1533                 rt->u.dst.lastuse = jiffies;
1534                 rt->rt6i_expires = 0;
1535
1536                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1537                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1538                 rt->rt6i_metric = 0;
1539
1540                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1541 #ifdef CONFIG_IPV6_SUBTREES
1542                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1543 #endif
1544                 rt->rt6i_table = ort->rt6i_table;
1545         }
1546         return rt;
1547 }
1548
1549 #ifdef CONFIG_IPV6_ROUTE_INFO
1550 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1551                                            struct in6_addr *gwaddr, int ifindex)
1552 {
1553         struct fib6_node *fn;
1554         struct rt6_info *rt = NULL;
1555         struct fib6_table *table;
1556
1557         table = fib6_get_table(RT6_TABLE_INFO);
1558         if (table == NULL)
1559                 return NULL;
1560
1561         write_lock_bh(&table->tb6_lock);
1562         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1563         if (!fn)
1564                 goto out;
1565
1566         for (rt = fn->leaf; rt; rt = rt->u.next) {
1567                 if (rt->rt6i_dev->ifindex != ifindex)
1568                         continue;
1569                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1570                         continue;
1571                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1572                         continue;
1573                 dst_hold(&rt->u.dst);
1574                 break;
1575         }
1576 out:
1577         write_unlock_bh(&table->tb6_lock);
1578         return rt;
1579 }
1580
1581 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1582                                            struct in6_addr *gwaddr, int ifindex,
1583                                            unsigned pref)
1584 {
1585         struct fib6_config cfg = {
1586                 .fc_table       = RT6_TABLE_INFO,
1587                 .fc_metric      = 1024,
1588                 .fc_ifindex     = ifindex,
1589                 .fc_dst_len     = prefixlen,
1590                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1591                                   RTF_UP | RTF_PREF(pref),
1592         };
1593
1594         ipv6_addr_copy(&cfg.fc_dst, prefix);
1595         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1596
1597         /* We should treat it as a default route if prefix length is 0. */
1598         if (!prefixlen)
1599                 cfg.fc_flags |= RTF_DEFAULT;
1600
1601         ip6_route_add(&cfg);
1602
1603         return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1604 }
1605 #endif
1606
1607 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1608 {       
1609         struct rt6_info *rt;
1610         struct fib6_table *table;
1611
1612         table = fib6_get_table(RT6_TABLE_DFLT);
1613         if (table == NULL)
1614                 return NULL;
1615
1616         write_lock_bh(&table->tb6_lock);
1617         for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
1618                 if (dev == rt->rt6i_dev &&
1619                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1620                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1621                         break;
1622         }
1623         if (rt)
1624                 dst_hold(&rt->u.dst);
1625         write_unlock_bh(&table->tb6_lock);
1626         return rt;
1627 }
1628
1629 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1630                                      struct net_device *dev,
1631                                      unsigned int pref)
1632 {
1633         struct fib6_config cfg = {
1634                 .fc_table       = RT6_TABLE_DFLT,
1635                 .fc_metric      = 1024,
1636                 .fc_ifindex     = dev->ifindex,
1637                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1638                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1639         };
1640
1641         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1642
1643         ip6_route_add(&cfg);
1644
1645         return rt6_get_dflt_router(gwaddr, dev);
1646 }
1647
1648 void rt6_purge_dflt_routers(void)
1649 {
1650         struct rt6_info *rt;
1651         struct fib6_table *table;
1652
1653         /* NOTE: Keep consistent with rt6_get_dflt_router */
1654         table = fib6_get_table(RT6_TABLE_DFLT);
1655         if (table == NULL)
1656                 return;
1657
1658 restart:
1659         read_lock_bh(&table->tb6_lock);
1660         for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
1661                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1662                         dst_hold(&rt->u.dst);
1663                         read_unlock_bh(&table->tb6_lock);
1664                         ip6_del_rt(rt);
1665                         goto restart;
1666                 }
1667         }
1668         read_unlock_bh(&table->tb6_lock);
1669 }
1670
1671 static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1672                                  struct fib6_config *cfg)
1673 {
1674         memset(cfg, 0, sizeof(*cfg));
1675
1676         cfg->fc_table = RT6_TABLE_MAIN;
1677         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1678         cfg->fc_metric = rtmsg->rtmsg_metric;
1679         cfg->fc_expires = rtmsg->rtmsg_info;
1680         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1681         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1682         cfg->fc_flags = rtmsg->rtmsg_flags;
1683
1684         ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1685         ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1686         ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1687 }
1688
1689 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1690 {
1691         struct fib6_config cfg;
1692         struct in6_rtmsg rtmsg;
1693         int err;
1694
1695         switch(cmd) {
1696         case SIOCADDRT:         /* Add a route */
1697         case SIOCDELRT:         /* Delete a route */
1698                 if (!capable(CAP_NET_ADMIN))
1699                         return -EPERM;
1700                 err = copy_from_user(&rtmsg, arg,
1701                                      sizeof(struct in6_rtmsg));
1702                 if (err)
1703                         return -EFAULT;
1704
1705                 rtmsg_to_fib6_config(&rtmsg, &cfg);
1706
1707                 rtnl_lock();
1708                 switch (cmd) {
1709                 case SIOCADDRT:
1710                         err = ip6_route_add(&cfg);
1711                         break;
1712                 case SIOCDELRT:
1713                         err = ip6_route_del(&cfg);
1714                         break;
1715                 default:
1716                         err = -EINVAL;
1717                 }
1718                 rtnl_unlock();
1719
1720                 return err;
1721         };
1722
1723         return -EINVAL;
1724 }
1725
1726 /*
1727  *      Drop the packet on the floor
1728  */
1729
1730 static int ip6_pkt_discard(struct sk_buff *skb)
1731 {
1732         int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1733         if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
1734                 IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS);
1735
1736         IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1737         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1738         kfree_skb(skb);
1739         return 0;
1740 }
1741
1742 static int ip6_pkt_discard_out(struct sk_buff *skb)
1743 {
1744         skb->dev = skb->dst->dev;
1745         return ip6_pkt_discard(skb);
1746 }
1747
1748 /*
1749  *      Allocate a dst for local (unicast / anycast) address.
1750  */
1751
1752 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1753                                     const struct in6_addr *addr,
1754                                     int anycast)
1755 {
1756         struct rt6_info *rt = ip6_dst_alloc();
1757
1758         if (rt == NULL)
1759                 return ERR_PTR(-ENOMEM);
1760
1761         dev_hold(&loopback_dev);
1762         in6_dev_hold(idev);
1763
1764         rt->u.dst.flags = DST_HOST;
1765         rt->u.dst.input = ip6_input;
1766         rt->u.dst.output = ip6_output;
1767         rt->rt6i_dev = &loopback_dev;
1768         rt->rt6i_idev = idev;
1769         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1770         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1771         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1772         rt->u.dst.obsolete = -1;
1773
1774         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1775         if (anycast)
1776                 rt->rt6i_flags |= RTF_ANYCAST;
1777         else
1778                 rt->rt6i_flags |= RTF_LOCAL;
1779         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1780         if (rt->rt6i_nexthop == NULL) {
1781                 dst_free((struct dst_entry *) rt);
1782                 return ERR_PTR(-ENOMEM);
1783         }
1784
1785         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1786         rt->rt6i_dst.plen = 128;
1787         rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1788
1789         atomic_set(&rt->u.dst.__refcnt, 1);
1790
1791         return rt;
1792 }
1793
1794 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1795 {
1796         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1797             rt != &ip6_null_entry) {
1798                 RT6_TRACE("deleted by ifdown %p\n", rt);
1799                 return -1;
1800         }
1801         return 0;
1802 }
1803
1804 void rt6_ifdown(struct net_device *dev)
1805 {
1806         fib6_clean_all(fib6_ifdown, 0, dev);
1807 }
1808
1809 struct rt6_mtu_change_arg
1810 {
1811         struct net_device *dev;
1812         unsigned mtu;
1813 };
1814
1815 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1816 {
1817         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1818         struct inet6_dev *idev;
1819
1820         /* In IPv6 pmtu discovery is not optional,
1821            so that RTAX_MTU lock cannot disable it.
1822            We still use this lock to block changes
1823            caused by addrconf/ndisc.
1824         */
1825
1826         idev = __in6_dev_get(arg->dev);
1827         if (idev == NULL)
1828                 return 0;
1829
1830         /* For administrative MTU increase, there is no way to discover
1831            IPv6 PMTU increase, so PMTU increase should be updated here.
1832            Since RFC 1981 doesn't include administrative MTU increase
1833            update PMTU increase is a MUST. (i.e. jumbo frame)
1834          */
1835         /*
1836            If new MTU is less than route PMTU, this new MTU will be the
1837            lowest MTU in the path, update the route PMTU to reflect PMTU
1838            decreases; if new MTU is greater than route PMTU, and the
1839            old MTU is the lowest MTU in the path, update the route PMTU
1840            to reflect the increase. In this case if the other nodes' MTU
1841            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1842            PMTU discouvery.
1843          */
1844         if (rt->rt6i_dev == arg->dev &&
1845             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1846             (dst_mtu(&rt->u.dst) > arg->mtu ||
1847              (dst_mtu(&rt->u.dst) < arg->mtu &&
1848               dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1849                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1850         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1851         return 0;
1852 }
1853
1854 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1855 {
1856         struct rt6_mtu_change_arg arg = {
1857                 .dev = dev,
1858                 .mtu = mtu,
1859         };
1860
1861         fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1862 }
1863
1864 static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
1865         [RTA_GATEWAY]           = { .minlen = sizeof(struct in6_addr) },
1866         [RTA_OIF]               = { .type = NLA_U32 },
1867         [RTA_IIF]               = { .type = NLA_U32 },
1868         [RTA_PRIORITY]          = { .type = NLA_U32 },
1869         [RTA_METRICS]           = { .type = NLA_NESTED },
1870 };
1871
1872 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1873                               struct fib6_config *cfg)
1874 {
1875         struct rtmsg *rtm;
1876         struct nlattr *tb[RTA_MAX+1];
1877         int err;
1878
1879         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1880         if (err < 0)
1881                 goto errout;
1882
1883         err = -EINVAL;
1884         rtm = nlmsg_data(nlh);
1885         memset(cfg, 0, sizeof(*cfg));
1886
1887         cfg->fc_table = rtm->rtm_table;
1888         cfg->fc_dst_len = rtm->rtm_dst_len;
1889         cfg->fc_src_len = rtm->rtm_src_len;
1890         cfg->fc_flags = RTF_UP;
1891         cfg->fc_protocol = rtm->rtm_protocol;
1892
1893         if (rtm->rtm_type == RTN_UNREACHABLE)
1894                 cfg->fc_flags |= RTF_REJECT;
1895
1896         cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1897         cfg->fc_nlinfo.nlh = nlh;
1898
1899         if (tb[RTA_GATEWAY]) {
1900                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1901                 cfg->fc_flags |= RTF_GATEWAY;
1902         }
1903
1904         if (tb[RTA_DST]) {
1905                 int plen = (rtm->rtm_dst_len + 7) >> 3;
1906
1907                 if (nla_len(tb[RTA_DST]) < plen)
1908                         goto errout;
1909
1910                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1911         }
1912
1913         if (tb[RTA_SRC]) {
1914                 int plen = (rtm->rtm_src_len + 7) >> 3;
1915
1916                 if (nla_len(tb[RTA_SRC]) < plen)
1917                         goto errout;
1918
1919                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1920         }
1921
1922         if (tb[RTA_OIF])
1923                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1924
1925         if (tb[RTA_PRIORITY])
1926                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1927
1928         if (tb[RTA_METRICS]) {
1929                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
1930                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1931         }
1932
1933         if (tb[RTA_TABLE])
1934                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
1935
1936         err = 0;
1937 errout:
1938         return err;
1939 }
1940
1941 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1942 {
1943         struct fib6_config cfg;
1944         int err;
1945
1946         err = rtm_to_fib6_config(skb, nlh, &cfg);
1947         if (err < 0)
1948                 return err;
1949
1950         return ip6_route_del(&cfg);
1951 }
1952
1953 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1954 {
1955         struct fib6_config cfg;
1956         int err;
1957
1958         err = rtm_to_fib6_config(skb, nlh, &cfg);
1959         if (err < 0)
1960                 return err;
1961
1962         return ip6_route_add(&cfg);
1963 }
1964
1965 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1966                          struct in6_addr *dst, struct in6_addr *src,
1967                          int iif, int type, u32 pid, u32 seq,
1968                          int prefix, unsigned int flags)
1969 {
1970         struct rtmsg *rtm;
1971         struct nlmsghdr *nlh;
1972         struct rta_cacheinfo ci;
1973         u32 table;
1974
1975         if (prefix) {   /* user wants prefix routes only */
1976                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1977                         /* success since this is not a prefix route */
1978                         return 1;
1979                 }
1980         }
1981
1982         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
1983         if (nlh == NULL)
1984                 return -ENOBUFS;
1985
1986         rtm = nlmsg_data(nlh);
1987         rtm->rtm_family = AF_INET6;
1988         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1989         rtm->rtm_src_len = rt->rt6i_src.plen;
1990         rtm->rtm_tos = 0;
1991         if (rt->rt6i_table)
1992                 table = rt->rt6i_table->tb6_id;
1993         else
1994                 table = RT6_TABLE_UNSPEC;
1995         rtm->rtm_table = table;
1996         NLA_PUT_U32(skb, RTA_TABLE, table);
1997         if (rt->rt6i_flags&RTF_REJECT)
1998                 rtm->rtm_type = RTN_UNREACHABLE;
1999         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2000                 rtm->rtm_type = RTN_LOCAL;
2001         else
2002                 rtm->rtm_type = RTN_UNICAST;
2003         rtm->rtm_flags = 0;
2004         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2005         rtm->rtm_protocol = rt->rt6i_protocol;
2006         if (rt->rt6i_flags&RTF_DYNAMIC)
2007                 rtm->rtm_protocol = RTPROT_REDIRECT;
2008         else if (rt->rt6i_flags & RTF_ADDRCONF)
2009                 rtm->rtm_protocol = RTPROT_KERNEL;
2010         else if (rt->rt6i_flags&RTF_DEFAULT)
2011                 rtm->rtm_protocol = RTPROT_RA;
2012
2013         if (rt->rt6i_flags&RTF_CACHE)
2014                 rtm->rtm_flags |= RTM_F_CLONED;
2015
2016         if (dst) {
2017                 NLA_PUT(skb, RTA_DST, 16, dst);
2018                 rtm->rtm_dst_len = 128;
2019         } else if (rtm->rtm_dst_len)
2020                 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2021 #ifdef CONFIG_IPV6_SUBTREES
2022         if (src) {
2023                 NLA_PUT(skb, RTA_SRC, 16, src);
2024                 rtm->rtm_src_len = 128;
2025         } else if (rtm->rtm_src_len)
2026                 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2027 #endif
2028         if (iif)
2029                 NLA_PUT_U32(skb, RTA_IIF, iif);
2030         else if (dst) {
2031                 struct in6_addr saddr_buf;
2032                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2033                         NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2034         }
2035
2036         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2037                 goto nla_put_failure;
2038
2039         if (rt->u.dst.neighbour)
2040                 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2041
2042         if (rt->u.dst.dev)
2043                 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2044
2045         NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2046         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
2047         if (rt->rt6i_expires)
2048                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
2049         else
2050                 ci.rta_expires = 0;
2051         ci.rta_used = rt->u.dst.__use;
2052         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
2053         ci.rta_error = rt->u.dst.error;
2054         ci.rta_id = 0;
2055         ci.rta_ts = 0;
2056         ci.rta_tsage = 0;
2057         NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
2058
2059         return nlmsg_end(skb, nlh);
2060
2061 nla_put_failure:
2062         return nlmsg_cancel(skb, nlh);
2063 }
2064
2065 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2066 {
2067         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2068         int prefix;
2069
2070         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2071                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2072                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2073         } else
2074                 prefix = 0;
2075
2076         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2077                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2078                      prefix, NLM_F_MULTI);
2079 }
2080
2081 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2082 {
2083         struct nlattr *tb[RTA_MAX+1];
2084         struct rt6_info *rt;
2085         struct sk_buff *skb;
2086         struct rtmsg *rtm;
2087         struct flowi fl;
2088         int err, iif = 0;
2089
2090         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2091         if (err < 0)
2092                 goto errout;
2093
2094         err = -EINVAL;
2095         memset(&fl, 0, sizeof(fl));
2096
2097         if (tb[RTA_SRC]) {
2098                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2099                         goto errout;
2100
2101                 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2102         }
2103
2104         if (tb[RTA_DST]) {
2105                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2106                         goto errout;
2107
2108                 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2109         }
2110
2111         if (tb[RTA_IIF])
2112                 iif = nla_get_u32(tb[RTA_IIF]);
2113
2114         if (tb[RTA_OIF])
2115                 fl.oif = nla_get_u32(tb[RTA_OIF]);
2116
2117         if (iif) {
2118                 struct net_device *dev;
2119                 dev = __dev_get_by_index(iif);
2120                 if (!dev) {
2121                         err = -ENODEV;
2122                         goto errout;
2123                 }
2124         }
2125
2126         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2127         if (skb == NULL) {
2128                 err = -ENOBUFS;
2129                 goto errout;
2130         }
2131
2132         /* Reserve room for dummy headers, this skb can pass
2133            through good chunk of routing engine.
2134          */
2135         skb->mac.raw = skb->data;
2136         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2137
2138         rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2139         skb->dst = &rt->u.dst;
2140
2141         err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2142                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2143                             nlh->nlmsg_seq, 0, 0);
2144         if (err < 0) {
2145                 kfree_skb(skb);
2146                 goto errout;
2147         }
2148
2149         err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
2150 errout:
2151         return err;
2152 }
2153
2154 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2155 {
2156         struct sk_buff *skb;
2157         u32 pid = 0, seq = 0;
2158         struct nlmsghdr *nlh = NULL;
2159         int payload = sizeof(struct rtmsg) + 256;
2160         int err = -ENOBUFS;
2161
2162         if (info) {
2163                 pid = info->pid;
2164                 nlh = info->nlh;
2165                 if (nlh)
2166                         seq = nlh->nlmsg_seq;
2167         }
2168
2169         skb = nlmsg_new(nlmsg_total_size(payload), gfp_any());
2170         if (skb == NULL)
2171                 goto errout;
2172
2173         err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2174         if (err < 0) {
2175                 kfree_skb(skb);
2176                 goto errout;
2177         }
2178
2179         err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2180 errout:
2181         if (err < 0)
2182                 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
2183 }
2184
2185 /*
2186  *      /proc
2187  */
2188
2189 #ifdef CONFIG_PROC_FS
2190
2191 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2192
2193 struct rt6_proc_arg
2194 {
2195         char *buffer;
2196         int offset;
2197         int length;
2198         int skip;
2199         int len;
2200 };
2201
2202 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2203 {
2204         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2205         int i;
2206
2207         if (arg->skip < arg->offset / RT6_INFO_LEN) {
2208                 arg->skip++;
2209                 return 0;
2210         }
2211
2212         if (arg->len >= arg->length)
2213                 return 0;
2214
2215         for (i=0; i<16; i++) {
2216                 sprintf(arg->buffer + arg->len, "%02x",
2217                         rt->rt6i_dst.addr.s6_addr[i]);
2218                 arg->len += 2;
2219         }
2220         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2221                             rt->rt6i_dst.plen);
2222
2223 #ifdef CONFIG_IPV6_SUBTREES
2224         for (i=0; i<16; i++) {
2225                 sprintf(arg->buffer + arg->len, "%02x",
2226                         rt->rt6i_src.addr.s6_addr[i]);
2227                 arg->len += 2;
2228         }
2229         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2230                             rt->rt6i_src.plen);
2231 #else
2232         sprintf(arg->buffer + arg->len,
2233                 "00000000000000000000000000000000 00 ");
2234         arg->len += 36;
2235 #endif
2236
2237         if (rt->rt6i_nexthop) {
2238                 for (i=0; i<16; i++) {
2239                         sprintf(arg->buffer + arg->len, "%02x",
2240                                 rt->rt6i_nexthop->primary_key[i]);
2241                         arg->len += 2;
2242                 }
2243         } else {
2244                 sprintf(arg->buffer + arg->len,
2245                         "00000000000000000000000000000000");
2246                 arg->len += 32;
2247         }
2248         arg->len += sprintf(arg->buffer + arg->len,
2249                             " %08x %08x %08x %08x %8s\n",
2250                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2251                             rt->u.dst.__use, rt->rt6i_flags, 
2252                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
2253         return 0;
2254 }
2255
2256 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2257 {
2258         struct rt6_proc_arg arg = {
2259                 .buffer = buffer,
2260                 .offset = offset,
2261                 .length = length,
2262         };
2263
2264         fib6_clean_all(rt6_info_route, 0, &arg);
2265
2266         *start = buffer;
2267         if (offset)
2268                 *start += offset % RT6_INFO_LEN;
2269
2270         arg.len -= offset % RT6_INFO_LEN;
2271
2272         if (arg.len > length)
2273                 arg.len = length;
2274         if (arg.len < 0)
2275                 arg.len = 0;
2276
2277         return arg.len;
2278 }
2279
2280 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2281 {
2282         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2283                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2284                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2285                       rt6_stats.fib_rt_cache,
2286                       atomic_read(&ip6_dst_ops.entries),
2287                       rt6_stats.fib_discarded_routes);
2288
2289         return 0;
2290 }
2291
2292 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2293 {
2294         return single_open(file, rt6_stats_seq_show, NULL);
2295 }
2296
2297 static struct file_operations rt6_stats_seq_fops = {
2298         .owner   = THIS_MODULE,
2299         .open    = rt6_stats_seq_open,
2300         .read    = seq_read,
2301         .llseek  = seq_lseek,
2302         .release = single_release,
2303 };
2304 #endif  /* CONFIG_PROC_FS */
2305
2306 #ifdef CONFIG_SYSCTL
2307
2308 static int flush_delay;
2309
2310 static
2311 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2312                               void __user *buffer, size_t *lenp, loff_t *ppos)
2313 {
2314         if (write) {
2315                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2316                 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2317                 return 0;
2318         } else
2319                 return -EINVAL;
2320 }
2321
2322 ctl_table ipv6_route_table[] = {
2323         {
2324                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
2325                 .procname       =       "flush",
2326                 .data           =       &flush_delay,
2327                 .maxlen         =       sizeof(int),
2328                 .mode           =       0200,
2329                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
2330         },
2331         {
2332                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
2333                 .procname       =       "gc_thresh",
2334                 .data           =       &ip6_dst_ops.gc_thresh,
2335                 .maxlen         =       sizeof(int),
2336                 .mode           =       0644,
2337                 .proc_handler   =       &proc_dointvec,
2338         },
2339         {
2340                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2341                 .procname       =       "max_size",
2342                 .data           =       &ip6_rt_max_size,
2343                 .maxlen         =       sizeof(int),
2344                 .mode           =       0644,
2345                 .proc_handler   =       &proc_dointvec,
2346         },
2347         {
2348                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2349                 .procname       =       "gc_min_interval",
2350                 .data           =       &ip6_rt_gc_min_interval,
2351                 .maxlen         =       sizeof(int),
2352                 .mode           =       0644,
2353                 .proc_handler   =       &proc_dointvec_jiffies,
2354                 .strategy       =       &sysctl_jiffies,
2355         },
2356         {
2357                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2358                 .procname       =       "gc_timeout",
2359                 .data           =       &ip6_rt_gc_timeout,
2360                 .maxlen         =       sizeof(int),
2361                 .mode           =       0644,
2362                 .proc_handler   =       &proc_dointvec_jiffies,
2363                 .strategy       =       &sysctl_jiffies,
2364         },
2365         {
2366                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2367                 .procname       =       "gc_interval",
2368                 .data           =       &ip6_rt_gc_interval,
2369                 .maxlen         =       sizeof(int),
2370                 .mode           =       0644,
2371                 .proc_handler   =       &proc_dointvec_jiffies,
2372                 .strategy       =       &sysctl_jiffies,
2373         },
2374         {
2375                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2376                 .procname       =       "gc_elasticity",
2377                 .data           =       &ip6_rt_gc_elasticity,
2378                 .maxlen         =       sizeof(int),
2379                 .mode           =       0644,
2380                 .proc_handler   =       &proc_dointvec_jiffies,
2381                 .strategy       =       &sysctl_jiffies,
2382         },
2383         {
2384                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2385                 .procname       =       "mtu_expires",
2386                 .data           =       &ip6_rt_mtu_expires,
2387                 .maxlen         =       sizeof(int),
2388                 .mode           =       0644,
2389                 .proc_handler   =       &proc_dointvec_jiffies,
2390                 .strategy       =       &sysctl_jiffies,
2391         },
2392         {
2393                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2394                 .procname       =       "min_adv_mss",
2395                 .data           =       &ip6_rt_min_advmss,
2396                 .maxlen         =       sizeof(int),
2397                 .mode           =       0644,
2398                 .proc_handler   =       &proc_dointvec_jiffies,
2399                 .strategy       =       &sysctl_jiffies,
2400         },
2401         {
2402                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2403                 .procname       =       "gc_min_interval_ms",
2404                 .data           =       &ip6_rt_gc_min_interval,
2405                 .maxlen         =       sizeof(int),
2406                 .mode           =       0644,
2407                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2408                 .strategy       =       &sysctl_ms_jiffies,
2409         },
2410         { .ctl_name = 0 }
2411 };
2412
2413 #endif
2414
2415 void __init ip6_route_init(void)
2416 {
2417         struct proc_dir_entry *p;
2418
2419         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2420                                                      sizeof(struct rt6_info),
2421                                                      0, SLAB_HWCACHE_ALIGN,
2422                                                      NULL, NULL);
2423         if (!ip6_dst_ops.kmem_cachep)
2424                 panic("cannot create ip6_dst_cache");
2425
2426         fib6_init();
2427 #ifdef  CONFIG_PROC_FS
2428         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2429         if (p)
2430                 p->owner = THIS_MODULE;
2431
2432         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2433 #endif
2434 #ifdef CONFIG_XFRM
2435         xfrm6_init();
2436 #endif
2437 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2438         fib6_rules_init();
2439 #endif
2440 }
2441
2442 void ip6_route_cleanup(void)
2443 {
2444 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2445         fib6_rules_cleanup();
2446 #endif
2447 #ifdef CONFIG_PROC_FS
2448         proc_net_remove("ipv6_route");
2449         proc_net_remove("rt6_stats");
2450 #endif
2451 #ifdef CONFIG_XFRM
2452         xfrm6_fini();
2453 #endif
2454         rt6_ifdown(NULL);
2455         fib6_gc_cleanup();
2456         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2457 }