2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
27 #include <linux/capability.h>
28 #include <linux/config.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/init.h>
39 #include <linux/netlink.h>
40 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
54 #include <linux/rtnetlink.h>
58 #include <asm/uaccess.h>
61 #include <linux/sysctl.h>
64 /* Set to 3 to get tracing. */
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
72 #define RT6_TRACE(x...) do { ; } while (0)
75 #define CLONE_OFFLINK_ROUTE 0
77 #define RT6_SELECT_F_IFACE 0x1
78 #define RT6_SELECT_F_REACHABLE 0x2
80 static int ip6_rt_max_size = 4096;
81 static int ip6_rt_gc_min_interval = HZ / 2;
82 static int ip6_rt_gc_timeout = 60*HZ;
83 int ip6_rt_gc_interval = 30*HZ;
84 static int ip6_rt_gc_elasticity = 9;
85 static int ip6_rt_mtu_expires = 10*60*HZ;
86 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
88 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91 static void ip6_dst_destroy(struct dst_entry *);
92 static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
94 static int ip6_dst_gc(void);
96 static int ip6_pkt_discard(struct sk_buff *skb);
97 static int ip6_pkt_discard_out(struct sk_buff *skb);
98 static void ip6_link_failure(struct sk_buff *skb);
99 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
101 static struct dst_ops ip6_dst_ops = {
103 .protocol = __constant_htons(ETH_P_IPV6),
106 .check = ip6_dst_check,
107 .destroy = ip6_dst_destroy,
108 .ifdown = ip6_dst_ifdown,
109 .negative_advice = ip6_negative_advice,
110 .link_failure = ip6_link_failure,
111 .update_pmtu = ip6_rt_update_pmtu,
112 .entry_size = sizeof(struct rt6_info),
115 struct rt6_info ip6_null_entry = {
118 .__refcnt = ATOMIC_INIT(1),
120 .dev = &loopback_dev,
122 .error = -ENETUNREACH,
123 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
124 .input = ip6_pkt_discard,
125 .output = ip6_pkt_discard_out,
127 .path = (struct dst_entry*)&ip6_null_entry,
130 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
131 .rt6i_metric = ~(u32) 0,
132 .rt6i_ref = ATOMIC_INIT(1),
135 struct fib6_node ip6_routing_table = {
136 .leaf = &ip6_null_entry,
137 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
140 /* Protects all the ip6 fib */
142 DEFINE_RWLOCK(rt6_lock);
145 /* allocate dst with ip6_dst_ops */
146 static __inline__ struct rt6_info *ip6_dst_alloc(void)
148 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
151 static void ip6_dst_destroy(struct dst_entry *dst)
153 struct rt6_info *rt = (struct rt6_info *)dst;
154 struct inet6_dev *idev = rt->rt6i_idev;
157 rt->rt6i_idev = NULL;
162 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
165 struct rt6_info *rt = (struct rt6_info *)dst;
166 struct inet6_dev *idev = rt->rt6i_idev;
168 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
169 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
170 if (loopback_idev != NULL) {
171 rt->rt6i_idev = loopback_idev;
177 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
179 return (rt->rt6i_flags & RTF_EXPIRES &&
180 time_after(jiffies, rt->rt6i_expires));
184 * Route lookup. Any rt6_lock is implied.
187 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
191 struct rt6_info *local = NULL;
192 struct rt6_info *sprt;
195 for (sprt = rt; sprt; sprt = sprt->u.next) {
196 struct net_device *dev = sprt->rt6i_dev;
197 if (dev->ifindex == oif)
199 if (dev->flags & IFF_LOOPBACK) {
200 if (sprt->rt6i_idev == NULL ||
201 sprt->rt6i_idev->dev->ifindex != oif) {
204 if (local && (!oif ||
205 local->rt6i_idev->dev->ifindex == oif))
216 return &ip6_null_entry;
222 * Default Router Selection (RFC 2461 6.3.6)
224 static int inline rt6_check_dev(struct rt6_info *rt, int oif)
226 struct net_device *dev = rt->rt6i_dev;
227 if (!oif || dev->ifindex == oif)
229 if ((dev->flags & IFF_LOOPBACK) &&
230 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
235 static int inline rt6_check_neigh(struct rt6_info *rt)
237 struct neighbour *neigh = rt->rt6i_nexthop;
240 read_lock_bh(&neigh->lock);
241 if (neigh->nud_state & NUD_VALID)
243 read_unlock_bh(&neigh->lock);
248 static int rt6_score_route(struct rt6_info *rt, int oif,
251 int m = rt6_check_dev(rt, oif);
252 if (!m && (strict & RT6_SELECT_F_IFACE))
254 if (rt6_check_neigh(rt))
256 else if (strict & RT6_SELECT_F_REACHABLE)
261 static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
264 struct rt6_info *match = NULL, *last = NULL;
265 struct rt6_info *rt, *rt0 = *head;
269 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
270 __FUNCTION__, head, head ? *head : NULL, oif);
272 for (rt = rt0, metric = rt0->rt6i_metric;
273 rt && rt->rt6i_metric == metric;
277 if (rt6_check_expired(rt))
282 m = rt6_score_route(rt, oif, strict);
293 (strict & RT6_SELECT_F_REACHABLE) &&
294 last && last != rt0) {
295 /* no entries matched; do round-robin */
297 rt0->u.next = last->u.next;
301 RT6_TRACE("%s() => %p, score=%d\n",
302 __FUNCTION__, match, mpri);
304 return (match ? match : &ip6_null_entry);
307 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
310 struct fib6_node *fn;
313 read_lock_bh(&rt6_lock);
314 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
315 rt = rt6_device_match(fn->leaf, oif, strict);
316 dst_hold(&rt->u.dst);
318 read_unlock_bh(&rt6_lock);
320 rt->u.dst.lastuse = jiffies;
321 if (rt->u.dst.error == 0)
323 dst_release(&rt->u.dst);
327 /* ip6_ins_rt is called with FREE rt6_lock.
328 It takes new route entry, the addition fails by any reason the
329 route is freed. In any case, if caller does not hold it, it may
333 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
334 void *_rtattr, struct netlink_skb_parms *req)
338 write_lock_bh(&rt6_lock);
339 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
340 write_unlock_bh(&rt6_lock);
345 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
346 struct in6_addr *saddr)
354 rt = ip6_rt_copy(ort);
357 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
358 if (rt->rt6i_dst.plen != 128 &&
359 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
360 rt->rt6i_flags |= RTF_ANYCAST;
361 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
364 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
365 rt->rt6i_dst.plen = 128;
366 rt->rt6i_flags |= RTF_CACHE;
367 rt->u.dst.flags |= DST_HOST;
369 #ifdef CONFIG_IPV6_SUBTREES
370 if (rt->rt6i_src.plen && saddr) {
371 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
372 rt->rt6i_src.plen = 128;
376 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
383 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
385 struct rt6_info *rt = ip6_rt_copy(ort);
387 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
388 rt->rt6i_dst.plen = 128;
389 rt->rt6i_flags |= RTF_CACHE;
390 if (rt->rt6i_flags & RTF_REJECT)
391 rt->u.dst.error = ort->u.dst.error;
392 rt->u.dst.flags |= DST_HOST;
393 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
398 #define BACKTRACK() \
399 if (rt == &ip6_null_entry) { \
400 while ((fn = fn->parent) != NULL) { \
401 if (fn->fn_flags & RTN_ROOT) { \
404 if (fn->fn_flags & RTN_RTINFO) \
410 void ip6_route_input(struct sk_buff *skb)
412 struct fib6_node *fn;
413 struct rt6_info *rt, *nrt;
418 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
421 read_lock_bh(&rt6_lock);
423 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
424 &skb->nh.ipv6h->saddr);
429 rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | RT6_SELECT_F_REACHABLE);
430 if (rt == &ip6_null_entry)
431 rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict);
433 if ((rt->rt6i_flags & RTF_CACHE))
436 dst_hold(&rt->u.dst);
437 read_unlock_bh(&rt6_lock);
439 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
440 nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
442 #if CLONE_OFFLINK_ROUTE
443 nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
449 dst_release(&rt->u.dst);
450 rt = nrt ? : &ip6_null_entry;
452 dst_hold(&rt->u.dst);
454 err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
463 * Race condition! In the gap, when rt6_lock was
464 * released someone could insert this route. Relookup.
466 dst_release(&rt->u.dst);
470 dst_hold(&rt->u.dst);
471 read_unlock_bh(&rt6_lock);
473 rt->u.dst.lastuse = jiffies;
475 skb->dst = (struct dst_entry *) rt;
479 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
481 struct fib6_node *fn;
482 struct rt6_info *rt, *nrt;
487 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
490 read_lock_bh(&rt6_lock);
492 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
495 rt = rt6_select(&fn->leaf, fl->oif, strict | RT6_SELECT_F_REACHABLE);
496 if (rt == &ip6_null_entry)
497 rt = rt6_select(&fn->leaf, fl->oif, strict);
499 if ((rt->rt6i_flags & RTF_CACHE))
502 dst_hold(&rt->u.dst);
503 read_unlock_bh(&rt6_lock);
505 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
506 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
508 #if CLONE_OFFLINK_ROUTE
509 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
515 dst_release(&rt->u.dst);
516 rt = nrt ? : &ip6_null_entry;
518 dst_hold(&rt->u.dst);
520 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
529 * Race condition! In the gap, when rt6_lock was
530 * released someone could insert this route. Relookup.
532 dst_release(&rt->u.dst);
536 dst_hold(&rt->u.dst);
537 read_unlock_bh(&rt6_lock);
539 rt->u.dst.lastuse = jiffies;
546 * Destination cache support functions
549 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
553 rt = (struct rt6_info *) dst;
555 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
561 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
563 struct rt6_info *rt = (struct rt6_info *) dst;
566 if (rt->rt6i_flags & RTF_CACHE)
567 ip6_del_rt(rt, NULL, NULL, NULL);
574 static void ip6_link_failure(struct sk_buff *skb)
578 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
580 rt = (struct rt6_info *) skb->dst;
582 if (rt->rt6i_flags&RTF_CACHE) {
583 dst_set_expires(&rt->u.dst, 0);
584 rt->rt6i_flags |= RTF_EXPIRES;
585 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
586 rt->rt6i_node->fn_sernum = -1;
590 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
592 struct rt6_info *rt6 = (struct rt6_info*)dst;
594 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
595 rt6->rt6i_flags |= RTF_MODIFIED;
596 if (mtu < IPV6_MIN_MTU) {
598 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
600 dst->metrics[RTAX_MTU-1] = mtu;
604 /* Protected by rt6_lock. */
605 static struct dst_entry *ndisc_dst_gc_list;
606 static int ipv6_get_mtu(struct net_device *dev);
608 static inline unsigned int ipv6_advmss(unsigned int mtu)
610 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
612 if (mtu < ip6_rt_min_advmss)
613 mtu = ip6_rt_min_advmss;
616 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
617 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
618 * IPV6_MAXPLEN is also valid and means: "any MSS,
619 * rely only on pmtu discovery"
621 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
626 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
627 struct neighbour *neigh,
628 struct in6_addr *addr,
629 int (*output)(struct sk_buff *))
632 struct inet6_dev *idev = in6_dev_get(dev);
634 if (unlikely(idev == NULL))
637 rt = ip6_dst_alloc();
638 if (unlikely(rt == NULL)) {
647 neigh = ndisc_get_neigh(dev, addr);
650 rt->rt6i_idev = idev;
651 rt->rt6i_nexthop = neigh;
652 atomic_set(&rt->u.dst.__refcnt, 1);
653 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
654 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
655 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
656 rt->u.dst.output = output;
658 #if 0 /* there's no chance to use these for ndisc */
659 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
662 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
663 rt->rt6i_dst.plen = 128;
666 write_lock_bh(&rt6_lock);
667 rt->u.dst.next = ndisc_dst_gc_list;
668 ndisc_dst_gc_list = &rt->u.dst;
669 write_unlock_bh(&rt6_lock);
671 fib6_force_start_gc();
674 return (struct dst_entry *)rt;
677 int ndisc_dst_gc(int *more)
679 struct dst_entry *dst, *next, **pprev;
683 pprev = &ndisc_dst_gc_list;
685 while ((dst = *pprev) != NULL) {
686 if (!atomic_read(&dst->__refcnt)) {
699 static int ip6_dst_gc(void)
701 static unsigned expire = 30*HZ;
702 static unsigned long last_gc;
703 unsigned long now = jiffies;
705 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
706 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
712 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
713 expire = ip6_rt_gc_timeout>>1;
716 expire -= expire>>ip6_rt_gc_elasticity;
717 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
720 /* Clean host part of a prefix. Not necessary in radix tree,
721 but results in cleaner routing tables.
723 Remove it only when all the things will work!
726 static int ipv6_get_mtu(struct net_device *dev)
728 int mtu = IPV6_MIN_MTU;
729 struct inet6_dev *idev;
731 idev = in6_dev_get(dev);
733 mtu = idev->cnf.mtu6;
739 int ipv6_get_hoplimit(struct net_device *dev)
741 int hoplimit = ipv6_devconf.hop_limit;
742 struct inet6_dev *idev;
744 idev = in6_dev_get(dev);
746 hoplimit = idev->cnf.hop_limit;
756 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
757 void *_rtattr, struct netlink_skb_parms *req)
762 struct rt6_info *rt = NULL;
763 struct net_device *dev = NULL;
764 struct inet6_dev *idev = NULL;
767 rta = (struct rtattr **) _rtattr;
769 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
771 #ifndef CONFIG_IPV6_SUBTREES
772 if (rtmsg->rtmsg_src_len)
775 if (rtmsg->rtmsg_ifindex) {
777 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
780 idev = in6_dev_get(dev);
785 if (rtmsg->rtmsg_metric == 0)
786 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
788 rt = ip6_dst_alloc();
795 rt->u.dst.obsolete = -1;
796 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
797 if (nlh && (r = NLMSG_DATA(nlh))) {
798 rt->rt6i_protocol = r->rtm_protocol;
800 rt->rt6i_protocol = RTPROT_BOOT;
803 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
805 if (addr_type & IPV6_ADDR_MULTICAST)
806 rt->u.dst.input = ip6_mc_input;
808 rt->u.dst.input = ip6_forward;
810 rt->u.dst.output = ip6_output;
812 ipv6_addr_prefix(&rt->rt6i_dst.addr,
813 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
814 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
815 if (rt->rt6i_dst.plen == 128)
816 rt->u.dst.flags = DST_HOST;
818 #ifdef CONFIG_IPV6_SUBTREES
819 ipv6_addr_prefix(&rt->rt6i_src.addr,
820 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
821 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
824 rt->rt6i_metric = rtmsg->rtmsg_metric;
826 /* We cannot add true routes via loopback here,
827 they would result in kernel looping; promote them to reject routes
829 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
830 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
831 /* hold loopback dev/idev if we haven't done so. */
832 if (dev != &loopback_dev) {
839 idev = in6_dev_get(dev);
845 rt->u.dst.output = ip6_pkt_discard_out;
846 rt->u.dst.input = ip6_pkt_discard;
847 rt->u.dst.error = -ENETUNREACH;
848 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
852 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
853 struct in6_addr *gw_addr;
856 gw_addr = &rtmsg->rtmsg_gateway;
857 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
858 gwa_type = ipv6_addr_type(gw_addr);
860 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
861 struct rt6_info *grt;
863 /* IPv6 strictly inhibits using not link-local
864 addresses as nexthop address.
865 Otherwise, router will not able to send redirects.
866 It is very good, but in some (rare!) circumstances
867 (SIT, PtP, NBMA NOARP links) it is handy to allow
868 some exceptions. --ANK
871 if (!(gwa_type&IPV6_ADDR_UNICAST))
874 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
880 if (dev != grt->rt6i_dev) {
881 dst_release(&grt->u.dst);
886 idev = grt->rt6i_idev;
888 in6_dev_hold(grt->rt6i_idev);
890 if (!(grt->rt6i_flags&RTF_GATEWAY))
892 dst_release(&grt->u.dst);
898 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
906 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
907 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
908 if (IS_ERR(rt->rt6i_nexthop)) {
909 err = PTR_ERR(rt->rt6i_nexthop);
910 rt->rt6i_nexthop = NULL;
915 rt->rt6i_flags = rtmsg->rtmsg_flags;
918 if (rta && rta[RTA_METRICS-1]) {
919 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
920 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
922 while (RTA_OK(attr, attrlen)) {
923 unsigned flavor = attr->rta_type;
925 if (flavor > RTAX_MAX) {
929 rt->u.dst.metrics[flavor-1] =
930 *(u32 *)RTA_DATA(attr);
932 attr = RTA_NEXT(attr, attrlen);
936 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
937 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
938 if (!rt->u.dst.metrics[RTAX_MTU-1])
939 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
940 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
941 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
943 rt->rt6i_idev = idev;
944 return ip6_ins_rt(rt, nlh, _rtattr, req);
952 dst_free((struct dst_entry *) rt);
956 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
960 write_lock_bh(&rt6_lock);
962 err = fib6_del(rt, nlh, _rtattr, req);
963 dst_release(&rt->u.dst);
965 write_unlock_bh(&rt6_lock);
970 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
972 struct fib6_node *fn;
976 read_lock_bh(&rt6_lock);
978 fn = fib6_locate(&ip6_routing_table,
979 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
980 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
983 for (rt = fn->leaf; rt; rt = rt->u.next) {
984 if (rtmsg->rtmsg_ifindex &&
985 (rt->rt6i_dev == NULL ||
986 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
988 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
989 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
991 if (rtmsg->rtmsg_metric &&
992 rtmsg->rtmsg_metric != rt->rt6i_metric)
994 dst_hold(&rt->u.dst);
995 read_unlock_bh(&rt6_lock);
997 return ip6_del_rt(rt, nlh, _rtattr, req);
1000 read_unlock_bh(&rt6_lock);
1008 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1009 struct neighbour *neigh, u8 *lladdr, int on_link)
1011 struct rt6_info *rt, *nrt;
1013 /* Locate old route to this destination. */
1014 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1019 if (neigh->dev != rt->rt6i_dev)
1023 * Current route is on-link; redirect is always invalid.
1025 * Seems, previous statement is not true. It could
1026 * be node, which looks for us as on-link (f.e. proxy ndisc)
1027 * But then router serving it might decide, that we should
1028 * know truth 8)8) --ANK (980726).
1030 if (!(rt->rt6i_flags&RTF_GATEWAY))
1034 * RFC 2461 specifies that redirects should only be
1035 * accepted if they come from the nexthop to the target.
1036 * Due to the way default routers are chosen, this notion
1037 * is a bit fuzzy and one might need to check all default
1040 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1041 if (rt->rt6i_flags & RTF_DEFAULT) {
1042 struct rt6_info *rt1;
1044 read_lock(&rt6_lock);
1045 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1046 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1047 dst_hold(&rt1->u.dst);
1048 dst_release(&rt->u.dst);
1049 read_unlock(&rt6_lock);
1054 read_unlock(&rt6_lock);
1056 if (net_ratelimit())
1057 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1058 "for redirect target\n");
1065 * We have finally decided to accept it.
1068 neigh_update(neigh, lladdr, NUD_STALE,
1069 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1070 NEIGH_UPDATE_F_OVERRIDE|
1071 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1072 NEIGH_UPDATE_F_ISROUTER))
1076 * Redirect received -> path was valid.
1077 * Look, redirects are sent only in response to data packets,
1078 * so that this nexthop apparently is reachable. --ANK
1080 dst_confirm(&rt->u.dst);
1082 /* Duplicate redirect: silently ignore. */
1083 if (neigh == rt->u.dst.neighbour)
1086 nrt = ip6_rt_copy(rt);
1090 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1092 nrt->rt6i_flags &= ~RTF_GATEWAY;
1094 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1095 nrt->rt6i_dst.plen = 128;
1096 nrt->u.dst.flags |= DST_HOST;
1098 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1099 nrt->rt6i_nexthop = neigh_clone(neigh);
1100 /* Reset pmtu, it may be better */
1101 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1102 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1104 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1107 if (rt->rt6i_flags&RTF_CACHE) {
1108 ip6_del_rt(rt, NULL, NULL, NULL);
1113 dst_release(&rt->u.dst);
1118 * Handle ICMP "packet too big" messages
1119 * i.e. Path MTU discovery
1122 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1123 struct net_device *dev, u32 pmtu)
1125 struct rt6_info *rt, *nrt;
1128 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1132 if (pmtu >= dst_mtu(&rt->u.dst))
1135 if (pmtu < IPV6_MIN_MTU) {
1137 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1138 * MTU (1280) and a fragment header should always be included
1139 * after a node receiving Too Big message reporting PMTU is
1140 * less than the IPv6 Minimum Link MTU.
1142 pmtu = IPV6_MIN_MTU;
1146 /* New mtu received -> path was valid.
1147 They are sent only in response to data packets,
1148 so that this nexthop apparently is reachable. --ANK
1150 dst_confirm(&rt->u.dst);
1152 /* Host route. If it is static, it would be better
1153 not to override it, but add new one, so that
1154 when cache entry will expire old pmtu
1155 would return automatically.
1157 if (rt->rt6i_flags & RTF_CACHE) {
1158 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1160 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1161 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1162 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1167 Two cases are possible:
1168 1. It is connected route. Action: COW
1169 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1171 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1172 nrt = rt6_alloc_cow(rt, daddr, saddr);
1174 nrt = rt6_alloc_clone(rt, daddr);
1177 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1179 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1181 /* According to RFC 1981, detecting PMTU increase shouldn't be
1182 * happened within 5 mins, the recommended timer is 10 mins.
1183 * Here this route expiration time is set to ip6_rt_mtu_expires
1184 * which is 10 mins. After 10 mins the decreased pmtu is expired
1185 * and detecting PMTU increase will be automatically happened.
1187 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1188 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1190 ip6_ins_rt(nrt, NULL, NULL, NULL);
1193 dst_release(&rt->u.dst);
1197 * Misc support functions
1200 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1202 struct rt6_info *rt = ip6_dst_alloc();
1205 rt->u.dst.input = ort->u.dst.input;
1206 rt->u.dst.output = ort->u.dst.output;
1208 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1209 rt->u.dst.dev = ort->u.dst.dev;
1211 dev_hold(rt->u.dst.dev);
1212 rt->rt6i_idev = ort->rt6i_idev;
1214 in6_dev_hold(rt->rt6i_idev);
1215 rt->u.dst.lastuse = jiffies;
1216 rt->rt6i_expires = 0;
1218 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1219 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1220 rt->rt6i_metric = 0;
1222 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1223 #ifdef CONFIG_IPV6_SUBTREES
1224 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1230 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1232 struct rt6_info *rt;
1233 struct fib6_node *fn;
1235 fn = &ip6_routing_table;
1237 write_lock_bh(&rt6_lock);
1238 for (rt = fn->leaf; rt; rt=rt->u.next) {
1239 if (dev == rt->rt6i_dev &&
1240 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1241 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1245 dst_hold(&rt->u.dst);
1246 write_unlock_bh(&rt6_lock);
1250 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1251 struct net_device *dev)
1253 struct in6_rtmsg rtmsg;
1255 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1256 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1257 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1258 rtmsg.rtmsg_metric = 1024;
1259 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1261 rtmsg.rtmsg_ifindex = dev->ifindex;
1263 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1264 return rt6_get_dflt_router(gwaddr, dev);
1267 void rt6_purge_dflt_routers(void)
1269 struct rt6_info *rt;
1272 read_lock_bh(&rt6_lock);
1273 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1274 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1275 dst_hold(&rt->u.dst);
1277 read_unlock_bh(&rt6_lock);
1279 ip6_del_rt(rt, NULL, NULL, NULL);
1284 read_unlock_bh(&rt6_lock);
1287 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1289 struct in6_rtmsg rtmsg;
1293 case SIOCADDRT: /* Add a route */
1294 case SIOCDELRT: /* Delete a route */
1295 if (!capable(CAP_NET_ADMIN))
1297 err = copy_from_user(&rtmsg, arg,
1298 sizeof(struct in6_rtmsg));
1305 err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1308 err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1322 * Drop the packet on the floor
1325 static int ip6_pkt_discard(struct sk_buff *skb)
1327 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1328 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1333 static int ip6_pkt_discard_out(struct sk_buff *skb)
1335 skb->dev = skb->dst->dev;
1336 return ip6_pkt_discard(skb);
1340 * Allocate a dst for local (unicast / anycast) address.
1343 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1344 const struct in6_addr *addr,
1347 struct rt6_info *rt = ip6_dst_alloc();
1350 return ERR_PTR(-ENOMEM);
1352 dev_hold(&loopback_dev);
1355 rt->u.dst.flags = DST_HOST;
1356 rt->u.dst.input = ip6_input;
1357 rt->u.dst.output = ip6_output;
1358 rt->rt6i_dev = &loopback_dev;
1359 rt->rt6i_idev = idev;
1360 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1361 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1362 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1363 rt->u.dst.obsolete = -1;
1365 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1367 rt->rt6i_flags |= RTF_ANYCAST;
1369 rt->rt6i_flags |= RTF_LOCAL;
1370 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1371 if (rt->rt6i_nexthop == NULL) {
1372 dst_free((struct dst_entry *) rt);
1373 return ERR_PTR(-ENOMEM);
1376 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1377 rt->rt6i_dst.plen = 128;
1379 atomic_set(&rt->u.dst.__refcnt, 1);
1384 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1386 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1387 rt != &ip6_null_entry) {
1388 RT6_TRACE("deleted by ifdown %p\n", rt);
1394 void rt6_ifdown(struct net_device *dev)
1396 write_lock_bh(&rt6_lock);
1397 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1398 write_unlock_bh(&rt6_lock);
1401 struct rt6_mtu_change_arg
1403 struct net_device *dev;
1407 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1409 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1410 struct inet6_dev *idev;
1412 /* In IPv6 pmtu discovery is not optional,
1413 so that RTAX_MTU lock cannot disable it.
1414 We still use this lock to block changes
1415 caused by addrconf/ndisc.
1418 idev = __in6_dev_get(arg->dev);
1422 /* For administrative MTU increase, there is no way to discover
1423 IPv6 PMTU increase, so PMTU increase should be updated here.
1424 Since RFC 1981 doesn't include administrative MTU increase
1425 update PMTU increase is a MUST. (i.e. jumbo frame)
1428 If new MTU is less than route PMTU, this new MTU will be the
1429 lowest MTU in the path, update the route PMTU to reflect PMTU
1430 decreases; if new MTU is greater than route PMTU, and the
1431 old MTU is the lowest MTU in the path, update the route PMTU
1432 to reflect the increase. In this case if the other nodes' MTU
1433 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1436 if (rt->rt6i_dev == arg->dev &&
1437 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1438 (dst_mtu(&rt->u.dst) > arg->mtu ||
1439 (dst_mtu(&rt->u.dst) < arg->mtu &&
1440 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1441 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1442 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1446 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1448 struct rt6_mtu_change_arg arg;
1452 read_lock_bh(&rt6_lock);
1453 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1454 read_unlock_bh(&rt6_lock);
1457 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1458 struct in6_rtmsg *rtmsg)
1460 memset(rtmsg, 0, sizeof(*rtmsg));
1462 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1463 rtmsg->rtmsg_src_len = r->rtm_src_len;
1464 rtmsg->rtmsg_flags = RTF_UP;
1465 if (r->rtm_type == RTN_UNREACHABLE)
1466 rtmsg->rtmsg_flags |= RTF_REJECT;
1468 if (rta[RTA_GATEWAY-1]) {
1469 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1471 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1472 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1474 if (rta[RTA_DST-1]) {
1475 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1477 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1479 if (rta[RTA_SRC-1]) {
1480 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1482 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1484 if (rta[RTA_OIF-1]) {
1485 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1487 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1489 if (rta[RTA_PRIORITY-1]) {
1490 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1492 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1497 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1499 struct rtmsg *r = NLMSG_DATA(nlh);
1500 struct in6_rtmsg rtmsg;
1502 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1504 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1507 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1509 struct rtmsg *r = NLMSG_DATA(nlh);
1510 struct in6_rtmsg rtmsg;
1512 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1514 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1517 struct rt6_rtnl_dump_arg
1519 struct sk_buff *skb;
1520 struct netlink_callback *cb;
1523 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1524 struct in6_addr *dst, struct in6_addr *src,
1525 int iif, int type, u32 pid, u32 seq,
1526 int prefix, unsigned int flags)
1529 struct nlmsghdr *nlh;
1530 unsigned char *b = skb->tail;
1531 struct rta_cacheinfo ci;
1533 if (prefix) { /* user wants prefix routes only */
1534 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1535 /* success since this is not a prefix route */
1540 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1541 rtm = NLMSG_DATA(nlh);
1542 rtm->rtm_family = AF_INET6;
1543 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1544 rtm->rtm_src_len = rt->rt6i_src.plen;
1546 rtm->rtm_table = RT_TABLE_MAIN;
1547 if (rt->rt6i_flags&RTF_REJECT)
1548 rtm->rtm_type = RTN_UNREACHABLE;
1549 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1550 rtm->rtm_type = RTN_LOCAL;
1552 rtm->rtm_type = RTN_UNICAST;
1554 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1555 rtm->rtm_protocol = rt->rt6i_protocol;
1556 if (rt->rt6i_flags&RTF_DYNAMIC)
1557 rtm->rtm_protocol = RTPROT_REDIRECT;
1558 else if (rt->rt6i_flags & RTF_ADDRCONF)
1559 rtm->rtm_protocol = RTPROT_KERNEL;
1560 else if (rt->rt6i_flags&RTF_DEFAULT)
1561 rtm->rtm_protocol = RTPROT_RA;
1563 if (rt->rt6i_flags&RTF_CACHE)
1564 rtm->rtm_flags |= RTM_F_CLONED;
1567 RTA_PUT(skb, RTA_DST, 16, dst);
1568 rtm->rtm_dst_len = 128;
1569 } else if (rtm->rtm_dst_len)
1570 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1571 #ifdef CONFIG_IPV6_SUBTREES
1573 RTA_PUT(skb, RTA_SRC, 16, src);
1574 rtm->rtm_src_len = 128;
1575 } else if (rtm->rtm_src_len)
1576 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1579 RTA_PUT(skb, RTA_IIF, 4, &iif);
1581 struct in6_addr saddr_buf;
1582 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1583 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1585 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1586 goto rtattr_failure;
1587 if (rt->u.dst.neighbour)
1588 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1590 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1591 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1592 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1593 if (rt->rt6i_expires)
1594 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1597 ci.rta_used = rt->u.dst.__use;
1598 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1599 ci.rta_error = rt->u.dst.error;
1603 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1604 nlh->nlmsg_len = skb->tail - b;
1609 skb_trim(skb, b - skb->data);
1613 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1615 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1618 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1619 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1620 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1624 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1625 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1626 prefix, NLM_F_MULTI);
1629 static int fib6_dump_node(struct fib6_walker_t *w)
1632 struct rt6_info *rt;
1634 for (rt = w->leaf; rt; rt = rt->u.next) {
1635 res = rt6_dump_route(rt, w->args);
1637 /* Frame is full, suspend walking */
1647 static void fib6_dump_end(struct netlink_callback *cb)
1649 struct fib6_walker_t *w = (void*)cb->args[0];
1653 fib6_walker_unlink(w);
1656 cb->done = (void*)cb->args[1];
1660 static int fib6_dump_done(struct netlink_callback *cb)
1663 return cb->done ? cb->done(cb) : 0;
1666 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1668 struct rt6_rtnl_dump_arg arg;
1669 struct fib6_walker_t *w;
1675 w = (void*)cb->args[0];
1679 * 1. hook callback destructor.
1681 cb->args[1] = (long)cb->done;
1682 cb->done = fib6_dump_done;
1685 * 2. allocate and initialize walker.
1687 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1690 RT6_TRACE("dump<%p", w);
1691 memset(w, 0, sizeof(*w));
1692 w->root = &ip6_routing_table;
1693 w->func = fib6_dump_node;
1695 cb->args[0] = (long)w;
1696 read_lock_bh(&rt6_lock);
1698 read_unlock_bh(&rt6_lock);
1701 read_lock_bh(&rt6_lock);
1702 res = fib6_walk_continue(w);
1703 read_unlock_bh(&rt6_lock);
1706 if (res <= 0 && skb->len == 0)
1707 RT6_TRACE("%p>dump end\n", w);
1709 res = res < 0 ? res : skb->len;
1710 /* res < 0 is an error. (really, impossible)
1711 res == 0 means that dump is complete, but skb still can contain data.
1712 res > 0 dump is not complete, but frame is full.
1714 /* Destroy walker, if dump of this table is complete. */
1720 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1722 struct rtattr **rta = arg;
1725 struct sk_buff *skb;
1727 struct rt6_info *rt;
1729 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1733 /* Reserve room for dummy headers, this skb can pass
1734 through good chunk of routing engine.
1736 skb->mac.raw = skb->data;
1737 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1739 memset(&fl, 0, sizeof(fl));
1741 ipv6_addr_copy(&fl.fl6_src,
1742 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1744 ipv6_addr_copy(&fl.fl6_dst,
1745 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1748 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1751 struct net_device *dev;
1752 dev = __dev_get_by_index(iif);
1761 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1763 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1765 skb->dst = &rt->u.dst;
1767 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1768 err = rt6_fill_node(skb, rt,
1769 &fl.fl6_dst, &fl.fl6_src,
1771 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1772 nlh->nlmsg_seq, 0, 0);
1778 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1788 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1789 struct netlink_skb_parms *req)
1791 struct sk_buff *skb;
1792 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1793 u32 pid = current->pid;
1799 seq = nlh->nlmsg_seq;
1801 skb = alloc_skb(size, gfp_any());
1803 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1806 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1808 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1811 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1812 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1819 #ifdef CONFIG_PROC_FS
1821 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1832 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1834 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1837 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1842 if (arg->len >= arg->length)
1845 for (i=0; i<16; i++) {
1846 sprintf(arg->buffer + arg->len, "%02x",
1847 rt->rt6i_dst.addr.s6_addr[i]);
1850 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1853 #ifdef CONFIG_IPV6_SUBTREES
1854 for (i=0; i<16; i++) {
1855 sprintf(arg->buffer + arg->len, "%02x",
1856 rt->rt6i_src.addr.s6_addr[i]);
1859 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1862 sprintf(arg->buffer + arg->len,
1863 "00000000000000000000000000000000 00 ");
1867 if (rt->rt6i_nexthop) {
1868 for (i=0; i<16; i++) {
1869 sprintf(arg->buffer + arg->len, "%02x",
1870 rt->rt6i_nexthop->primary_key[i]);
1874 sprintf(arg->buffer + arg->len,
1875 "00000000000000000000000000000000");
1878 arg->len += sprintf(arg->buffer + arg->len,
1879 " %08x %08x %08x %08x %8s\n",
1880 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1881 rt->u.dst.__use, rt->rt6i_flags,
1882 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1886 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1888 struct rt6_proc_arg arg;
1889 arg.buffer = buffer;
1890 arg.offset = offset;
1891 arg.length = length;
1895 read_lock_bh(&rt6_lock);
1896 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1897 read_unlock_bh(&rt6_lock);
1901 *start += offset % RT6_INFO_LEN;
1903 arg.len -= offset % RT6_INFO_LEN;
1905 if (arg.len > length)
1913 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1915 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1916 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1917 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1918 rt6_stats.fib_rt_cache,
1919 atomic_read(&ip6_dst_ops.entries),
1920 rt6_stats.fib_discarded_routes);
1925 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1927 return single_open(file, rt6_stats_seq_show, NULL);
1930 static struct file_operations rt6_stats_seq_fops = {
1931 .owner = THIS_MODULE,
1932 .open = rt6_stats_seq_open,
1934 .llseek = seq_lseek,
1935 .release = single_release,
1937 #endif /* CONFIG_PROC_FS */
1939 #ifdef CONFIG_SYSCTL
1941 static int flush_delay;
1944 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1945 void __user *buffer, size_t *lenp, loff_t *ppos)
1948 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1949 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
1955 ctl_table ipv6_route_table[] = {
1957 .ctl_name = NET_IPV6_ROUTE_FLUSH,
1958 .procname = "flush",
1959 .data = &flush_delay,
1960 .maxlen = sizeof(int),
1962 .proc_handler = &ipv6_sysctl_rtcache_flush
1965 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
1966 .procname = "gc_thresh",
1967 .data = &ip6_dst_ops.gc_thresh,
1968 .maxlen = sizeof(int),
1970 .proc_handler = &proc_dointvec,
1973 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
1974 .procname = "max_size",
1975 .data = &ip6_rt_max_size,
1976 .maxlen = sizeof(int),
1978 .proc_handler = &proc_dointvec,
1981 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
1982 .procname = "gc_min_interval",
1983 .data = &ip6_rt_gc_min_interval,
1984 .maxlen = sizeof(int),
1986 .proc_handler = &proc_dointvec_jiffies,
1987 .strategy = &sysctl_jiffies,
1990 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
1991 .procname = "gc_timeout",
1992 .data = &ip6_rt_gc_timeout,
1993 .maxlen = sizeof(int),
1995 .proc_handler = &proc_dointvec_jiffies,
1996 .strategy = &sysctl_jiffies,
1999 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2000 .procname = "gc_interval",
2001 .data = &ip6_rt_gc_interval,
2002 .maxlen = sizeof(int),
2004 .proc_handler = &proc_dointvec_jiffies,
2005 .strategy = &sysctl_jiffies,
2008 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2009 .procname = "gc_elasticity",
2010 .data = &ip6_rt_gc_elasticity,
2011 .maxlen = sizeof(int),
2013 .proc_handler = &proc_dointvec_jiffies,
2014 .strategy = &sysctl_jiffies,
2017 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2018 .procname = "mtu_expires",
2019 .data = &ip6_rt_mtu_expires,
2020 .maxlen = sizeof(int),
2022 .proc_handler = &proc_dointvec_jiffies,
2023 .strategy = &sysctl_jiffies,
2026 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2027 .procname = "min_adv_mss",
2028 .data = &ip6_rt_min_advmss,
2029 .maxlen = sizeof(int),
2031 .proc_handler = &proc_dointvec_jiffies,
2032 .strategy = &sysctl_jiffies,
2035 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2036 .procname = "gc_min_interval_ms",
2037 .data = &ip6_rt_gc_min_interval,
2038 .maxlen = sizeof(int),
2040 .proc_handler = &proc_dointvec_ms_jiffies,
2041 .strategy = &sysctl_ms_jiffies,
2048 void __init ip6_route_init(void)
2050 struct proc_dir_entry *p;
2052 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2053 sizeof(struct rt6_info),
2054 0, SLAB_HWCACHE_ALIGN,
2056 if (!ip6_dst_ops.kmem_cachep)
2057 panic("cannot create ip6_dst_cache");
2060 #ifdef CONFIG_PROC_FS
2061 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2063 p->owner = THIS_MODULE;
2065 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2072 void ip6_route_cleanup(void)
2074 #ifdef CONFIG_PROC_FS
2075 proc_net_remove("ipv6_route");
2076 proc_net_remove("rt6_stats");
2083 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);