X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=net%2Fipv6%2Froute.c;h=e8b241cb60bc434a4240e0d955f0a1d168b39e07;hb=064922a805ec7aadfafdd27aa6b4908d737c3c1d;hp=6ecb5e6fae2eb9feff63496bd0749d02237af156;hpb=95b00786f3b8fa99f53931361beeb4c10504ad87;p=linux-2.6 diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6ecb5e6fae..a493ad9b89 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -36,10 +36,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include @@ -73,21 +75,13 @@ #define CLONE_OFFLINK_ROUTE 0 -static int ip6_rt_max_size = 4096; -static int ip6_rt_gc_min_interval = HZ / 2; -static int ip6_rt_gc_timeout = 60*HZ; -int ip6_rt_gc_interval = 30*HZ; -static int ip6_rt_gc_elasticity = 9; -static int ip6_rt_mtu_expires = 10*60*HZ; -static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; - static struct rt6_info * ip6_rt_copy(struct rt6_info *ort); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); static struct dst_entry *ip6_negative_advice(struct dst_entry *); static void ip6_dst_destroy(struct dst_entry *); static void ip6_dst_ifdown(struct dst_entry *, struct net_device *dev, int how); -static int ip6_dst_gc(void); +static int ip6_dst_gc(struct dst_ops *ops); static int ip6_pkt_discard(struct sk_buff *skb); static int ip6_pkt_discard_out(struct sk_buff *skb); @@ -95,14 +89,16 @@ static void ip6_link_failure(struct sk_buff *skb); static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); #ifdef CONFIG_IPV6_ROUTE_INFO -static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen, +static struct rt6_info *rt6_add_route_info(struct net *net, + struct in6_addr *prefix, int prefixlen, struct in6_addr *gwaddr, int ifindex, unsigned pref); -static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen, +static struct rt6_info *rt6_get_route_info(struct net *net, + struct in6_addr *prefix, int prefixlen, struct in6_addr *gwaddr, int ifindex); #endif -static struct dst_ops ip6_dst_ops = { +static struct dst_ops ip6_dst_ops_template = { .family = AF_INET6, .protocol = __constant_htons(ETH_P_IPV6), .gc = ip6_dst_gc, @@ -113,7 +109,9 @@ static struct dst_ops ip6_dst_ops = { .negative_advice = ip6_negative_advice, .link_failure = ip6_link_failure, .update_pmtu = ip6_rt_update_pmtu, + .local_out = ip6_local_out, .entry_size = sizeof(struct rt6_info), + .entries = ATOMIC_INIT(0), }; static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) @@ -127,9 +125,10 @@ static struct dst_ops ip6_dst_blackhole_ops = { .check = ip6_dst_check, .update_pmtu = ip6_rt_blackhole_update_pmtu, .entry_size = sizeof(struct rt6_info), + .entries = ATOMIC_INIT(0), }; -struct rt6_info ip6_null_entry = { +static struct rt6_info ip6_null_entry_template = { .u = { .dst = { .__refcnt = ATOMIC_INIT(1), @@ -139,8 +138,6 @@ struct rt6_info ip6_null_entry = { .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, .input = ip6_pkt_discard, .output = ip6_pkt_discard_out, - .ops = &ip6_dst_ops, - .path = (struct dst_entry*)&ip6_null_entry, } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), @@ -152,9 +149,8 @@ struct rt6_info ip6_null_entry = { static int ip6_pkt_prohibit(struct sk_buff *skb); static int ip6_pkt_prohibit_out(struct sk_buff *skb); -static int ip6_pkt_blk_hole(struct sk_buff *skb); -struct rt6_info ip6_prohibit_entry = { +static struct rt6_info ip6_prohibit_entry_template = { .u = { .dst = { .__refcnt = ATOMIC_INIT(1), @@ -164,8 +160,6 @@ struct rt6_info ip6_prohibit_entry = { .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, .input = ip6_pkt_prohibit, .output = ip6_pkt_prohibit_out, - .ops = &ip6_dst_ops, - .path = (struct dst_entry*)&ip6_prohibit_entry, } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), @@ -173,7 +167,7 @@ struct rt6_info ip6_prohibit_entry = { .rt6i_ref = ATOMIC_INIT(1), }; -struct rt6_info ip6_blk_hole_entry = { +static struct rt6_info ip6_blk_hole_entry_template = { .u = { .dst = { .__refcnt = ATOMIC_INIT(1), @@ -181,10 +175,8 @@ struct rt6_info ip6_blk_hole_entry = { .obsolete = -1, .error = -EINVAL, .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, - .input = ip6_pkt_blk_hole, - .output = ip6_pkt_blk_hole, - .ops = &ip6_dst_ops, - .path = (struct dst_entry*)&ip6_blk_hole_entry, + .input = dst_discard, + .output = dst_discard, } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), @@ -195,9 +187,9 @@ struct rt6_info ip6_blk_hole_entry = { #endif /* allocate dst with ip6_dst_ops */ -static __inline__ struct rt6_info *ip6_dst_alloc(void) +static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops) { - return (struct rt6_info *)dst_alloc(&ip6_dst_ops); + return (struct rt6_info *)dst_alloc(ops); } static void ip6_dst_destroy(struct dst_entry *dst) @@ -216,9 +208,12 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, { struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; + struct net_device *loopback_dev = + dev_net(dev)->loopback_dev; - if (dev != init_net.loopback_dev && idev != NULL && idev->dev == dev) { - struct inet6_dev *loopback_idev = in6_dev_get(init_net.loopback_dev); + if (dev != loopback_dev && idev != NULL && idev->dev == dev) { + struct inet6_dev *loopback_idev = + in6_dev_get(loopback_dev); if (loopback_idev != NULL) { rt->rt6i_idev = loopback_idev; in6_dev_put(idev); @@ -242,7 +237,8 @@ static inline int rt6_need_strict(struct in6_addr *daddr) * Route lookup. Any table->tb6_lock is implied. */ -static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt, +static inline struct rt6_info *rt6_device_match(struct net *net, + struct rt6_info *rt, int oif, int strict) { @@ -271,7 +267,7 @@ static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt, return local; if (strict) - return &ip6_null_entry; + return net->ipv6.ip6_null_entry; } return rt; } @@ -329,7 +325,7 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif) static inline int rt6_check_neigh(struct rt6_info *rt) { struct neighbour *neigh = rt->rt6i_nexthop; - int m = 0; + int m; if (rt->rt6i_flags & RTF_NONEXTHOP || !(rt->rt6i_flags & RTF_GATEWAY)) m = 1; @@ -337,10 +333,15 @@ static inline int rt6_check_neigh(struct rt6_info *rt) read_lock_bh(&neigh->lock); if (neigh->nud_state & NUD_VALID) m = 2; - else if (!(neigh->nud_state & NUD_FAILED)) +#ifdef CONFIG_IPV6_ROUTER_PREF + else if (neigh->nud_state & NUD_FAILED) + m = 0; +#endif + else m = 1; read_unlock_bh(&neigh->lock); - } + } else + m = 0; return m; } @@ -407,9 +408,10 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) { struct rt6_info *match, *rt0; + struct net *net; RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n", - __FUNCTION__, fn->leaf, oif); + __func__, fn->leaf, oif); rt0 = fn->rr_ptr; if (!rt0) @@ -430,15 +432,17 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) } RT6_TRACE("%s() => %p\n", - __FUNCTION__, match); + __func__, match); - return (match ? match : &ip6_null_entry); + net = dev_net(rt0->rt6i_dev); + return (match ? match : net->ipv6.ip6_null_entry); } #ifdef CONFIG_IPV6_ROUTE_INFO int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, struct in6_addr *gwaddr) { + struct net *net = dev_net(dev); struct route_info *rinfo = (struct route_info *) opt; struct in6_addr prefix_buf, *prefix; unsigned int pref; @@ -486,7 +490,8 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, prefix = &prefix_buf; } - rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex); + rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, + dev->ifindex); if (rt && !lifetime) { ip6_del_rt(rt); @@ -494,7 +499,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, } if (!rt && lifetime) - rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex, + rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, pref); else if (rt) rt->rt6i_flags = RTF_ROUTEINFO | @@ -513,9 +518,9 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, } #endif -#define BACKTRACK(saddr) \ +#define BACKTRACK(__net, saddr) \ do { \ - if (rt == &ip6_null_entry) { \ + if (rt == __net->ipv6.ip6_null_entry) { \ struct fib6_node *pn; \ while (1) { \ if (fn->fn_flags & RTN_TL_ROOT) \ @@ -531,7 +536,8 @@ do { \ } \ } while(0) -static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table, +static struct rt6_info *ip6_pol_route_lookup(struct net *net, + struct fib6_table *table, struct flowi *fl, int flags) { struct fib6_node *fn; @@ -541,8 +547,8 @@ static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table, fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); restart: rt = fn->leaf; - rt = rt6_device_match(rt, fl->oif, flags); - BACKTRACK(&fl->fl6_src); + rt = rt6_device_match(net, rt, fl->oif, flags); + BACKTRACK(net, &fl->fl6_src); out: dst_use(&rt->u.dst, jiffies); read_unlock_bh(&table->tb6_lock); @@ -550,8 +556,8 @@ out: } -struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, - int oif, int strict) +struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, + const struct in6_addr *saddr, int oif, int strict) { struct flowi fl = { .oif = oif, @@ -569,7 +575,7 @@ struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, flags |= RT6_LOOKUP_F_HAS_SADDR; } - dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup); + dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup); if (dst->error == 0) return (struct rt6_info *) dst; @@ -601,7 +607,10 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) int ip6_ins_rt(struct rt6_info *rt) { - return __ip6_ins_rt(rt, NULL); + struct nl_info info = { + .nl_net = dev_net(rt->rt6i_dev), + }; + return __ip6_ins_rt(rt, &info); } static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, @@ -655,8 +664,8 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d return rt; } -static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif, - struct flowi *fl, int flags) +static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, + struct flowi *fl, int flags) { struct fib6_node *fn; struct rt6_info *rt, *nrt; @@ -675,8 +684,9 @@ restart_2: restart: rt = rt6_select(fn, oif, strict | reachable); - BACKTRACK(&fl->fl6_src); - if (rt == &ip6_null_entry || + + BACKTRACK(net, &fl->fl6_src); + if (rt == net->ipv6.ip6_null_entry || rt->rt6i_flags & RTF_CACHE) goto out; @@ -694,7 +704,7 @@ restart: } dst_release(&rt->u.dst); - rt = nrt ? : &ip6_null_entry; + rt = nrt ? : net->ipv6.ip6_null_entry; dst_hold(&rt->u.dst); if (nrt) { @@ -727,15 +737,16 @@ out2: return rt; } -static struct rt6_info *ip6_pol_route_input(struct fib6_table *table, +static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, struct flowi *fl, int flags) { - return ip6_pol_route(table, fl->iif, fl, flags); + return ip6_pol_route(net, table, fl->iif, fl, flags); } void ip6_route_input(struct sk_buff *skb) { struct ipv6hdr *iph = ipv6_hdr(skb); + struct net *net = dev_net(skb->dev); int flags = RT6_LOOKUP_F_HAS_SADDR; struct flowi fl = { .iif = skb->dev->ifindex, @@ -753,16 +764,17 @@ void ip6_route_input(struct sk_buff *skb) if (rt6_need_strict(&iph->daddr)) flags |= RT6_LOOKUP_F_IFACE; - skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input); + skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input); } -static struct rt6_info *ip6_pol_route_output(struct fib6_table *table, +static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, struct flowi *fl, int flags) { - return ip6_pol_route(table, fl->oif, fl, flags); + return ip6_pol_route(net, table, fl->oif, fl, flags); } -struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) +struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, + struct flowi *fl) { int flags = 0; @@ -771,18 +783,21 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) if (!ipv6_addr_any(&fl->fl6_src)) flags |= RT6_LOOKUP_F_HAS_SADDR; + else if (sk) { + unsigned int prefs = inet6_sk(sk)->srcprefs; + if (prefs & IPV6_PREFER_SRC_TMP) + flags |= RT6_LOOKUP_F_SRCPREF_TMP; + if (prefs & IPV6_PREFER_SRC_PUBLIC) + flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC; + if (prefs & IPV6_PREFER_SRC_COA) + flags |= RT6_LOOKUP_F_SRCPREF_COA; + } - return fib6_rule_lookup(fl, flags, ip6_pol_route_output); + return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output); } EXPORT_SYMBOL(ip6_route_output); -static int ip6_blackhole_output(struct sk_buff *skb) -{ - kfree_skb(skb); - return 0; -} - int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl) { struct rt6_info *ort = (struct rt6_info *) *dstp; @@ -795,8 +810,8 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl atomic_set(&new->__refcnt, 1); new->__use = 1; - new->input = ip6_blackhole_output; - new->output = ip6_blackhole_output; + new->input = dst_discard; + new->output = dst_discard; memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); new->dev = ort->u.dst.dev; @@ -887,12 +902,12 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) static int ipv6_get_mtu(struct net_device *dev); -static inline unsigned int ipv6_advmss(unsigned int mtu) +static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu) { mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); - if (mtu < ip6_rt_min_advmss) - mtu = ip6_rt_min_advmss; + if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) + mtu = net->ipv6.sysctl.ip6_rt_min_advmss; /* * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and @@ -905,21 +920,21 @@ static inline unsigned int ipv6_advmss(unsigned int mtu) return mtu; } -static struct dst_entry *ndisc_dst_gc_list; -static DEFINE_SPINLOCK(ndisc_lock); +static struct dst_entry *icmp6_dst_gc_list; +static DEFINE_SPINLOCK(icmp6_dst_lock); -struct dst_entry *ndisc_dst_alloc(struct net_device *dev, +struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct neighbour *neigh, - struct in6_addr *addr, - int (*output)(struct sk_buff *)) + const struct in6_addr *addr) { struct rt6_info *rt; struct inet6_dev *idev = in6_dev_get(dev); + struct net *net = dev_net(dev); if (unlikely(idev == NULL)) return NULL; - rt = ip6_dst_alloc(); + rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); if (unlikely(rt == NULL)) { in6_dev_put(idev); goto out; @@ -937,8 +952,8 @@ struct dst_entry *ndisc_dst_alloc(struct net_device *dev, atomic_set(&rt->u.dst.__refcnt, 1); rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255; rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); - rt->u.dst.output = output; + rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); + rt->u.dst.output = ip6_output; #if 0 /* there's no chance to use these for ndisc */ rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST @@ -948,18 +963,18 @@ struct dst_entry *ndisc_dst_alloc(struct net_device *dev, rt->rt6i_dst.plen = 128; #endif - spin_lock_bh(&ndisc_lock); - rt->u.dst.next = ndisc_dst_gc_list; - ndisc_dst_gc_list = &rt->u.dst; - spin_unlock_bh(&ndisc_lock); + spin_lock_bh(&icmp6_dst_lock); + rt->u.dst.next = icmp6_dst_gc_list; + icmp6_dst_gc_list = &rt->u.dst; + spin_unlock_bh(&icmp6_dst_lock); - fib6_force_start_gc(); + fib6_force_start_gc(net); out: return &rt->u.dst; } -int ndisc_dst_gc(int *more) +int icmp6_dst_gc(int *more) { struct dst_entry *dst, *next, **pprev; int freed; @@ -967,8 +982,8 @@ int ndisc_dst_gc(int *more) next = NULL; freed = 0; - spin_lock_bh(&ndisc_lock); - pprev = &ndisc_dst_gc_list; + spin_lock_bh(&icmp6_dst_lock); + pprev = &icmp6_dst_gc_list; while ((dst = *pprev) != NULL) { if (!atomic_read(&dst->__refcnt)) { @@ -981,30 +996,33 @@ int ndisc_dst_gc(int *more) } } - spin_unlock_bh(&ndisc_lock); + spin_unlock_bh(&icmp6_dst_lock); return freed; } -static int ip6_dst_gc(void) +static int ip6_dst_gc(struct dst_ops *ops) { - static unsigned expire = 30*HZ; - static unsigned long last_gc; unsigned long now = jiffies; - - if (time_after(last_gc + ip6_rt_gc_min_interval, now) && - atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size) + struct net *net = ops->dst_net; + int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; + int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; + int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; + int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; + unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; + + if (time_after(rt_last_gc + rt_min_interval, now) && + atomic_read(&ops->entries) <= rt_max_size) goto out; - expire++; - fib6_run_gc(expire); - last_gc = now; - if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh) - expire = ip6_rt_gc_timeout>>1; - + net->ipv6.ip6_rt_gc_expire++; + fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); + net->ipv6.ip6_rt_last_gc = now; + if (atomic_read(&ops->entries) < ops->gc_thresh) + net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; out: - expire -= expire>>ip6_rt_gc_elasticity; - return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size); + net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; + return (atomic_read(&ops->entries) > rt_max_size); } /* Clean host part of a prefix. Not necessary in radix tree, @@ -1026,15 +1044,17 @@ static int ipv6_get_mtu(struct net_device *dev) return mtu; } -int ipv6_get_hoplimit(struct net_device *dev) +int ip6_dst_hoplimit(struct dst_entry *dst) { - int hoplimit = ipv6_devconf.hop_limit; - struct inet6_dev *idev; - - idev = in6_dev_get(dev); - if (idev) { - hoplimit = idev->cnf.hop_limit; - in6_dev_put(idev); + int hoplimit = dst_metric(dst, RTAX_HOPLIMIT); + if (hoplimit < 0) { + struct net_device *dev = dst->dev; + struct inet6_dev *idev = in6_dev_get(dev); + if (idev) { + hoplimit = idev->cnf.hop_limit; + in6_dev_put(idev); + } else + hoplimit = ipv6_devconf.hop_limit; } return hoplimit; } @@ -1046,6 +1066,7 @@ int ipv6_get_hoplimit(struct net_device *dev) int ip6_route_add(struct fib6_config *cfg) { int err; + struct net *net = cfg->fc_nlinfo.nl_net; struct rt6_info *rt = NULL; struct net_device *dev = NULL; struct inet6_dev *idev = NULL; @@ -1060,7 +1081,7 @@ int ip6_route_add(struct fib6_config *cfg) #endif if (cfg->fc_ifindex) { err = -ENODEV; - dev = dev_get_by_index(&init_net, cfg->fc_ifindex); + dev = dev_get_by_index(net, cfg->fc_ifindex); if (!dev) goto out; idev = in6_dev_get(dev); @@ -1071,13 +1092,13 @@ int ip6_route_add(struct fib6_config *cfg) if (cfg->fc_metric == 0) cfg->fc_metric = IP6_RT_PRIO_USER; - table = fib6_new_table(cfg->fc_table); + table = fib6_new_table(net, cfg->fc_table); if (table == NULL) { err = -ENOBUFS; goto out; } - rt = ip6_dst_alloc(); + rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); if (rt == NULL) { err = -ENOMEM; @@ -1118,12 +1139,12 @@ int ip6_route_add(struct fib6_config *cfg) if ((cfg->fc_flags & RTF_REJECT) || (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { /* hold loopback dev/idev if we haven't done so. */ - if (dev != init_net.loopback_dev) { + if (dev != net->loopback_dev) { if (dev) { dev_put(dev); in6_dev_put(idev); } - dev = init_net.loopback_dev; + dev = net->loopback_dev; dev_hold(dev); idev = in6_dev_get(dev); if (!idev) { @@ -1160,7 +1181,7 @@ int ip6_route_add(struct fib6_config *cfg) if (!(gwa_type&IPV6_ADDR_UNICAST)) goto out; - grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1); + grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); err = -EHOSTUNREACH; if (grt == NULL) @@ -1227,10 +1248,13 @@ install_route: if (!rt->u.dst.metrics[RTAX_MTU-1]) rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); if (!rt->u.dst.metrics[RTAX_ADVMSS-1]) - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); + rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); rt->u.dst.dev = dev; rt->rt6i_idev = idev; rt->rt6i_table = table; + + cfg->fc_nlinfo.nl_net = dev_net(dev); + return __ip6_ins_rt(rt, &cfg->fc_nlinfo); out: @@ -1247,8 +1271,9 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) { int err; struct fib6_table *table; + struct net *net = dev_net(rt->rt6i_dev); - if (rt == &ip6_null_entry) + if (rt == net->ipv6.ip6_null_entry) return -ENOENT; table = rt->rt6i_table; @@ -1264,7 +1289,10 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) int ip6_del_rt(struct rt6_info *rt) { - return __ip6_del_rt(rt, NULL); + struct nl_info info = { + .nl_net = dev_net(rt->rt6i_dev), + }; + return __ip6_del_rt(rt, &info); } static int ip6_route_del(struct fib6_config *cfg) @@ -1274,7 +1302,7 @@ static int ip6_route_del(struct fib6_config *cfg) struct rt6_info *rt; int err = -ESRCH; - table = fib6_get_table(cfg->fc_table); + table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); if (table == NULL) return err; @@ -1314,7 +1342,8 @@ struct ip6rd_flowi { struct in6_addr gateway; }; -static struct rt6_info *__ip6_route_redirect(struct fib6_table *table, +static struct rt6_info *__ip6_route_redirect(struct net *net, + struct fib6_table *table, struct flowi *fl, int flags) { @@ -1357,8 +1386,8 @@ restart: } if (!rt) - rt = &ip6_null_entry; - BACKTRACK(&fl->fl6_src); + rt = net->ipv6.ip6_null_entry; + BACKTRACK(net, &fl->fl6_src); out: dst_hold(&rt->u.dst); @@ -1373,6 +1402,7 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, struct net_device *dev) { int flags = RT6_LOOKUP_F_HAS_SADDR; + struct net *net = dev_net(dev); struct ip6rd_flowi rdfl = { .fl = { .oif = dev->ifindex, @@ -1389,7 +1419,8 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, if (rt6_need_strict(dest)) flags |= RT6_LOOKUP_F_IFACE; - return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect); + return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl, + flags, __ip6_route_redirect); } void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, @@ -1398,10 +1429,11 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, { struct rt6_info *rt, *nrt = NULL; struct netevent_redirect netevent; + struct net *net = dev_net(neigh->dev); rt = ip6_route_redirect(dest, src, saddr, neigh->dev); - if (rt == &ip6_null_entry) { + if (rt == net->ipv6.ip6_null_entry) { if (net_ratelimit()) printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " "for redirect target\n"); @@ -1446,7 +1478,8 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, nrt->rt6i_nexthop = neigh_clone(neigh); /* Reset pmtu, it may be better */ nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); - nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst)); + nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev), + dst_mtu(&nrt->u.dst)); if (ip6_ins_rt(nrt)) goto out; @@ -1474,9 +1507,10 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, struct net_device *dev, u32 pmtu) { struct rt6_info *rt, *nrt; + struct net *net = dev_net(dev); int allfrag = 0; - rt = rt6_lookup(daddr, saddr, dev->ifindex, 0); + rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0); if (rt == NULL) return; @@ -1509,7 +1543,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, rt->u.dst.metrics[RTAX_MTU-1] = pmtu; if (allfrag) rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; - dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires); + dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires); rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; goto out; } @@ -1535,7 +1569,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, * which is 10 mins. After 10 mins the decreased pmtu is expired * and detecting PMTU increase will be automatically happened. */ - dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires); + dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires); nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; ip6_ins_rt(nrt); @@ -1550,7 +1584,8 @@ out: static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) { - struct rt6_info *rt = ip6_dst_alloc(); + struct net *net = dev_net(ort->rt6i_dev); + struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); if (rt) { rt->u.dst.input = ort->u.dst.input; @@ -1581,14 +1616,15 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) } #ifdef CONFIG_IPV6_ROUTE_INFO -static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen, +static struct rt6_info *rt6_get_route_info(struct net *net, + struct in6_addr *prefix, int prefixlen, struct in6_addr *gwaddr, int ifindex) { struct fib6_node *fn; struct rt6_info *rt = NULL; struct fib6_table *table; - table = fib6_get_table(RT6_TABLE_INFO); + table = fib6_get_table(net, RT6_TABLE_INFO); if (table == NULL) return NULL; @@ -1612,17 +1648,21 @@ out: return rt; } -static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen, +static struct rt6_info *rt6_add_route_info(struct net *net, + struct in6_addr *prefix, int prefixlen, struct in6_addr *gwaddr, int ifindex, unsigned pref) { struct fib6_config cfg = { .fc_table = RT6_TABLE_INFO, - .fc_metric = 1024, + .fc_metric = IP6_RT_PRIO_USER, .fc_ifindex = ifindex, .fc_dst_len = prefixlen, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref), + .fc_nlinfo.pid = 0, + .fc_nlinfo.nlh = NULL, + .fc_nlinfo.nl_net = net, }; ipv6_addr_copy(&cfg.fc_dst, prefix); @@ -1634,7 +1674,7 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle ip6_route_add(&cfg); - return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex); + return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); } #endif @@ -1643,7 +1683,7 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d struct rt6_info *rt; struct fib6_table *table; - table = fib6_get_table(RT6_TABLE_DFLT); + table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); if (table == NULL) return NULL; @@ -1666,10 +1706,13 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, { struct fib6_config cfg = { .fc_table = RT6_TABLE_DFLT, - .fc_metric = 1024, + .fc_metric = IP6_RT_PRIO_USER, .fc_ifindex = dev->ifindex, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES | RTF_PREF(pref), + .fc_nlinfo.pid = 0, + .fc_nlinfo.nlh = NULL, + .fc_nlinfo.nl_net = dev_net(dev), }; ipv6_addr_copy(&cfg.fc_gateway, gwaddr); @@ -1679,13 +1722,13 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, return rt6_get_dflt_router(gwaddr, dev); } -void rt6_purge_dflt_routers(void) +void rt6_purge_dflt_routers(struct net *net) { struct rt6_info *rt; struct fib6_table *table; /* NOTE: Keep consistent with rt6_get_dflt_router */ - table = fib6_get_table(RT6_TABLE_DFLT); + table = fib6_get_table(net, RT6_TABLE_DFLT); if (table == NULL) return; @@ -1702,7 +1745,8 @@ restart: read_unlock_bh(&table->tb6_lock); } -static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg, +static void rtmsg_to_fib6_config(struct net *net, + struct in6_rtmsg *rtmsg, struct fib6_config *cfg) { memset(cfg, 0, sizeof(*cfg)); @@ -1715,12 +1759,14 @@ static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg, cfg->fc_src_len = rtmsg->rtmsg_src_len; cfg->fc_flags = rtmsg->rtmsg_flags; + cfg->fc_nlinfo.nl_net = net; + ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst); ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src); ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway); } -int ipv6_route_ioctl(unsigned int cmd, void __user *arg) +int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) { struct fib6_config cfg; struct in6_rtmsg rtmsg; @@ -1736,7 +1782,7 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg) if (err) return -EFAULT; - rtmsg_to_fib6_config(&rtmsg, &cfg); + rtmsg_to_fib6_config(net, &rtmsg, &cfg); rtnl_lock(); switch (cmd) { @@ -1761,8 +1807,7 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg) * Drop the packet on the floor */ -static inline int ip6_pkt_drop(struct sk_buff *skb, int code, - int ipstats_mib_noroutes) +static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes) { int type; switch (ipstats_mib_noroutes) { @@ -1806,12 +1851,6 @@ static int ip6_pkt_prohibit_out(struct sk_buff *skb) return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); } -static int ip6_pkt_blk_hole(struct sk_buff *skb) -{ - kfree_skb(skb); - return 0; -} - #endif /* @@ -1822,21 +1861,22 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, const struct in6_addr *addr, int anycast) { - struct rt6_info *rt = ip6_dst_alloc(); + struct net *net = dev_net(idev->dev); + struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); if (rt == NULL) return ERR_PTR(-ENOMEM); - dev_hold(init_net.loopback_dev); + dev_hold(net->loopback_dev); in6_dev_hold(idev); rt->u.dst.flags = DST_HOST; rt->u.dst.input = ip6_input; rt->u.dst.output = ip6_output; - rt->rt6i_dev = init_net.loopback_dev; + rt->rt6i_dev = net->loopback_dev; rt->rt6i_idev = idev; rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); + rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; rt->u.dst.obsolete = -1; @@ -1853,26 +1893,39 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, ipv6_addr_copy(&rt->rt6i_dst.addr, addr); rt->rt6i_dst.plen = 128; - rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL); + rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); atomic_set(&rt->u.dst.__refcnt, 1); return rt; } +struct arg_dev_net { + struct net_device *dev; + struct net *net; +}; + static int fib6_ifdown(struct rt6_info *rt, void *arg) { - if (((void*)rt->rt6i_dev == arg || arg == NULL) && - rt != &ip6_null_entry) { + struct net_device *dev = ((struct arg_dev_net *)arg)->dev; + struct net *net = ((struct arg_dev_net *)arg)->net; + + if (((void *)rt->rt6i_dev == dev || dev == NULL) && + rt != net->ipv6.ip6_null_entry) { RT6_TRACE("deleted by ifdown %p\n", rt); return -1; } return 0; } -void rt6_ifdown(struct net_device *dev) +void rt6_ifdown(struct net *net, struct net_device *dev) { - fib6_clean_all(fib6_ifdown, 0, dev); + struct arg_dev_net adn = { + .dev = dev, + .net = net, + }; + + fib6_clean_all(net, fib6_ifdown, 0, &adn); } struct rt6_mtu_change_arg @@ -1885,6 +1938,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) { struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; struct inet6_dev *idev; + struct net *net = dev_net(arg->dev); /* In IPv6 pmtu discovery is not optional, so that RTAX_MTU lock cannot disable it. @@ -1912,11 +1966,11 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) */ if (rt->rt6i_dev == arg->dev && !dst_metric_locked(&rt->u.dst, RTAX_MTU) && - (dst_mtu(&rt->u.dst) > arg->mtu || + (dst_mtu(&rt->u.dst) >= arg->mtu || (dst_mtu(&rt->u.dst) < arg->mtu && dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) { rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu; - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu); + rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu); } return 0; } @@ -1928,7 +1982,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned mtu) .mtu = mtu, }; - fib6_clean_all(rt6_mtu_change_route, 0, &arg); + fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg); } static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { @@ -1965,6 +2019,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; cfg->fc_nlinfo.nlh = nlh; + cfg->fc_nlinfo.nl_net = sock_net(skb->sk); if (tb[RTA_GATEWAY]) { nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); @@ -2050,7 +2105,7 @@ static inline size_t rt6_nlmsg_size(void) static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, struct in6_addr *dst, struct in6_addr *src, int iif, int type, u32 pid, u32 seq, - int prefix, unsigned int flags) + int prefix, int nowait, unsigned int flags) { struct rtmsg *rtm; struct nlmsghdr *nlh; @@ -2110,11 +2165,27 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, } else if (rtm->rtm_src_len) NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); #endif - if (iif) - NLA_PUT_U32(skb, RTA_IIF, iif); - else if (dst) { + if (iif) { +#ifdef CONFIG_IPV6_MROUTE + if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { + int err = ip6mr_get_route(skb, rtm, nowait); + if (err <= 0) { + if (!nowait) { + if (err == 0) + return 0; + goto nla_put_failure; + } else { + if (err == -EMSGSIZE) + goto nla_put_failure; + } + } + } else +#endif + NLA_PUT_U32(skb, RTA_IIF, iif); + } else if (dst) { struct in6_addr saddr_buf; - if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0) + if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev, + dst, 0, &saddr_buf) == 0) NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); } @@ -2154,11 +2225,12 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg) return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, - prefix, NLM_F_MULTI); + prefix, 0, NLM_F_MULTI); } static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = sock_net(in_skb->sk); struct nlattr *tb[RTA_MAX+1]; struct rt6_info *rt; struct sk_buff *skb; @@ -2195,7 +2267,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void if (iif) { struct net_device *dev; - dev = __dev_get_by_index(&init_net, iif); + dev = __dev_get_by_index(net, iif); if (!dev) { err = -ENODEV; goto errout; @@ -2214,18 +2286,18 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void skb_reset_mac_header(skb); skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); - rt = (struct rt6_info*) ip6_route_output(NULL, &fl); + rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl); skb->dst = &rt->u.dst; err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, RTM_NEWROUTE, NETLINK_CB(in_skb).pid, - nlh->nlmsg_seq, 0, 0); + nlh->nlmsg_seq, 0, 0, 0); if (err < 0) { kfree_skb(skb); goto errout; } - err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); errout: return err; } @@ -2233,32 +2305,50 @@ errout: void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) { struct sk_buff *skb; - u32 pid = 0, seq = 0; - struct nlmsghdr *nlh = NULL; - int err = -ENOBUFS; + struct net *net = info->nl_net; + u32 seq; + int err; - if (info) { - pid = info->pid; - nlh = info->nlh; - if (nlh) - seq = nlh->nlmsg_seq; - } + err = -ENOBUFS; + seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0; skb = nlmsg_new(rt6_nlmsg_size(), gfp_any()); if (skb == NULL) goto errout; - err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0); + err = rt6_fill_node(skb, rt, NULL, NULL, 0, + event, info->pid, seq, 0, 0, 0); if (err < 0) { /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any()); + err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, + info->nlh, gfp_any()); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err); + rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); +} + +static int ip6_route_dev_notify(struct notifier_block *this, + unsigned long event, void *data) +{ + struct net_device *dev = (struct net_device *)data; + struct net *net = dev_net(dev); + + if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { + net->ipv6.ip6_null_entry->u.dst.dev = dev; + net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + net->ipv6.ip6_prohibit_entry->u.dst.dev = dev; + net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); + net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev; + net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); +#endif + } + + return NOTIFY_OK; } /* @@ -2307,13 +2397,33 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) static int ipv6_route_show(struct seq_file *m, void *v) { - fib6_clean_all(rt6_info_route, 0, m); + struct net *net = (struct net *)m->private; + fib6_clean_all(net, rt6_info_route, 0, m); return 0; } static int ipv6_route_open(struct inode *inode, struct file *file) { - return single_open(file, ipv6_route_show, NULL); + int err; + struct net *net = get_proc_net(inode); + if (!net) + return -ENXIO; + + err = single_open(file, ipv6_route_show, net); + if (err < 0) { + put_net(net); + return err; + } + + return 0; +} + +static int ipv6_route_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct net *net = seq->private; + put_net(net); + return single_release(inode, file); } static const struct file_operations ipv6_route_proc_fops = { @@ -2321,24 +2431,46 @@ static const struct file_operations ipv6_route_proc_fops = { .open = ipv6_route_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = ipv6_route_release, }; static int rt6_stats_seq_show(struct seq_file *seq, void *v) { + struct net *net = (struct net *)seq->private; seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", - rt6_stats.fib_nodes, rt6_stats.fib_route_nodes, - rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries, - rt6_stats.fib_rt_cache, - atomic_read(&ip6_dst_ops.entries), - rt6_stats.fib_discarded_routes); + net->ipv6.rt6_stats->fib_nodes, + net->ipv6.rt6_stats->fib_route_nodes, + net->ipv6.rt6_stats->fib_rt_alloc, + net->ipv6.rt6_stats->fib_rt_entries, + net->ipv6.rt6_stats->fib_rt_cache, + atomic_read(&net->ipv6.ip6_dst_ops->entries), + net->ipv6.rt6_stats->fib_discarded_routes); return 0; } static int rt6_stats_seq_open(struct inode *inode, struct file *file) { - return single_open(file, rt6_stats_seq_show, NULL); + int err; + struct net *net = get_proc_net(inode); + if (!net) + return -ENXIO; + + err = single_open(file, rt6_stats_seq_show, net); + if (err < 0) { + put_net(net); + return err; + } + + return 0; +} + +static int rt6_stats_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct net *net = (struct net *)seq->private; + put_net(net); + return single_release(inode, file); } static const struct file_operations rt6_stats_seq_fops = { @@ -2346,30 +2478,30 @@ static const struct file_operations rt6_stats_seq_fops = { .open = rt6_stats_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = rt6_stats_seq_release, }; #endif /* CONFIG_PROC_FS */ #ifdef CONFIG_SYSCTL -static int flush_delay; - static int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos) { + struct net *net = current->nsproxy->net_ns; + int delay = net->ipv6.sysctl.flush_delay; if (write) { proc_dointvec(ctl, write, filp, buffer, lenp, ppos); - fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay); + fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); return 0; } else return -EINVAL; } -ctl_table ipv6_route_table[] = { +ctl_table ipv6_route_table_template[] = { { .procname = "flush", - .data = &flush_delay, + .data = &init_net.ipv6.sysctl.flush_delay, .maxlen = sizeof(int), .mode = 0200, .proc_handler = &ipv6_sysctl_rtcache_flush @@ -2377,7 +2509,7 @@ ctl_table ipv6_route_table[] = { { .ctl_name = NET_IPV6_ROUTE_GC_THRESH, .procname = "gc_thresh", - .data = &ip6_dst_ops.gc_thresh, + .data = &ip6_dst_ops_template.gc_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, @@ -2385,7 +2517,7 @@ ctl_table ipv6_route_table[] = { { .ctl_name = NET_IPV6_ROUTE_MAX_SIZE, .procname = "max_size", - .data = &ip6_rt_max_size, + .data = &init_net.ipv6.sysctl.ip6_rt_max_size, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, @@ -2393,7 +2525,7 @@ ctl_table ipv6_route_table[] = { { .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL, .procname = "gc_min_interval", - .data = &ip6_rt_gc_min_interval, + .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -2402,7 +2534,7 @@ ctl_table ipv6_route_table[] = { { .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT, .procname = "gc_timeout", - .data = &ip6_rt_gc_timeout, + .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -2411,7 +2543,7 @@ ctl_table ipv6_route_table[] = { { .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL, .procname = "gc_interval", - .data = &ip6_rt_gc_interval, + .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -2420,7 +2552,7 @@ ctl_table ipv6_route_table[] = { { .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY, .procname = "gc_elasticity", - .data = &ip6_rt_gc_elasticity, + .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -2429,7 +2561,7 @@ ctl_table ipv6_route_table[] = { { .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES, .procname = "mtu_expires", - .data = &ip6_rt_mtu_expires, + .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -2438,7 +2570,7 @@ ctl_table ipv6_route_table[] = { { .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS, .procname = "min_adv_mss", - .data = &ip6_rt_min_advmss, + .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -2447,7 +2579,7 @@ ctl_table ipv6_route_table[] = { { .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS, .procname = "gc_min_interval_ms", - .data = &ip6_rt_gc_min_interval, + .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_ms_jiffies, @@ -2456,43 +2588,186 @@ ctl_table ipv6_route_table[] = { { .ctl_name = 0 } }; +struct ctl_table *ipv6_route_sysctl_init(struct net *net) +{ + struct ctl_table *table; + + table = kmemdup(ipv6_route_table_template, + sizeof(ipv6_route_table_template), + GFP_KERNEL); + + if (table) { + table[0].data = &net->ipv6.sysctl.flush_delay; + table[1].data = &net->ipv6.ip6_dst_ops->gc_thresh; + table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; + table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; + table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; + table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; + table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; + table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; + table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; + } + + return table; +} #endif -void __init ip6_route_init(void) +static int ip6_route_net_init(struct net *net) { - ip6_dst_ops.kmem_cachep = - kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); - ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep; - - fib6_init(); - proc_net_fops_create(&init_net, "ipv6_route", 0, &ipv6_route_proc_fops); - proc_net_fops_create(&init_net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); -#ifdef CONFIG_XFRM - xfrm6_init(); + int ret = -ENOMEM; + + net->ipv6.ip6_dst_ops = kmemdup(&ip6_dst_ops_template, + sizeof(*net->ipv6.ip6_dst_ops), + GFP_KERNEL); + if (!net->ipv6.ip6_dst_ops) + goto out; + net->ipv6.ip6_dst_ops->dst_net = hold_net(net); + + net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, + sizeof(*net->ipv6.ip6_null_entry), + GFP_KERNEL); + if (!net->ipv6.ip6_null_entry) + goto out_ip6_dst_ops; + net->ipv6.ip6_null_entry->u.dst.path = + (struct dst_entry *)net->ipv6.ip6_null_entry; + net->ipv6.ip6_null_entry->u.dst.ops = net->ipv6.ip6_dst_ops; + +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, + sizeof(*net->ipv6.ip6_prohibit_entry), + GFP_KERNEL); + if (!net->ipv6.ip6_prohibit_entry) { + kfree(net->ipv6.ip6_null_entry); + goto out; + } + net->ipv6.ip6_prohibit_entry->u.dst.path = + (struct dst_entry *)net->ipv6.ip6_prohibit_entry; + net->ipv6.ip6_prohibit_entry->u.dst.ops = net->ipv6.ip6_dst_ops; + + net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, + sizeof(*net->ipv6.ip6_blk_hole_entry), + GFP_KERNEL); + if (!net->ipv6.ip6_blk_hole_entry) { + kfree(net->ipv6.ip6_null_entry); + kfree(net->ipv6.ip6_prohibit_entry); + goto out; + } + net->ipv6.ip6_blk_hole_entry->u.dst.path = + (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; + net->ipv6.ip6_blk_hole_entry->u.dst.ops = net->ipv6.ip6_dst_ops; #endif + +#ifdef CONFIG_PROC_FS + proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); + proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); +#endif + net->ipv6.ip6_rt_gc_expire = 30*HZ; + + ret = 0; +out: + return ret; + +out_ip6_dst_ops: + release_net(net->ipv6.ip6_dst_ops->dst_net); + kfree(net->ipv6.ip6_dst_ops); + goto out; +} + +static void ip6_route_net_exit(struct net *net) +{ +#ifdef CONFIG_PROC_FS + proc_net_remove(net, "ipv6_route"); + proc_net_remove(net, "rt6_stats"); +#endif + kfree(net->ipv6.ip6_null_entry); #ifdef CONFIG_IPV6_MULTIPLE_TABLES - fib6_rules_init(); + kfree(net->ipv6.ip6_prohibit_entry); + kfree(net->ipv6.ip6_blk_hole_entry); #endif + release_net(net->ipv6.ip6_dst_ops->dst_net); + kfree(net->ipv6.ip6_dst_ops); +} + +static struct pernet_operations ip6_route_net_ops = { + .init = ip6_route_net_init, + .exit = ip6_route_net_exit, +}; + +static struct notifier_block ip6_route_dev_notifier = { + .notifier_call = ip6_route_dev_notify, + .priority = 0, +}; + +int __init ip6_route_init(void) +{ + int ret; + + ret = -ENOMEM; + ip6_dst_ops_template.kmem_cachep = + kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!ip6_dst_ops_template.kmem_cachep) + goto out;; + + ret = register_pernet_subsys(&ip6_route_net_ops); + if (ret) + goto out_kmem_cache; + + /* Registering of the loopback is done before this portion of code, + * the loopback reference in rt6_info will not be taken, do it + * manually for init_net */ + init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); + #ifdef CONFIG_IPV6_MULTIPLE_TABLES + init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); + init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); + #endif + ret = fib6_init(); + if (ret) + goto out_register_subsys; + + ret = xfrm6_init(); + if (ret) + goto out_fib6_init; + + ret = fib6_rules_init(); + if (ret) + goto xfrm6_init; + + ret = -ENOBUFS; + if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) || + __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) || + __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL)) + goto fib6_rules_init; + + ret = register_netdevice_notifier(&ip6_route_dev_notifier); + if (ret) + goto fib6_rules_init; + +out: + return ret; - __rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL); - __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL); - __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL); +fib6_rules_init: + fib6_rules_cleanup(); +xfrm6_init: + xfrm6_fini(); +out_fib6_init: + fib6_gc_cleanup(); +out_register_subsys: + unregister_pernet_subsys(&ip6_route_net_ops); +out_kmem_cache: + kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); + goto out; } void ip6_route_cleanup(void) { -#ifdef CONFIG_IPV6_MULTIPLE_TABLES + unregister_netdevice_notifier(&ip6_route_dev_notifier); fib6_rules_cleanup(); -#endif -#ifdef CONFIG_PROC_FS - proc_net_remove(&init_net, "ipv6_route"); - proc_net_remove(&init_net, "rt6_stats"); -#endif -#ifdef CONFIG_XFRM xfrm6_fini(); -#endif - rt6_ifdown(NULL); fib6_gc_cleanup(); - kmem_cache_destroy(ip6_dst_ops.kmem_cachep); + unregister_pernet_subsys(&ip6_route_net_ops); + kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); }