static void rt_worker_func(struct work_struct *work);
static DECLARE_DELAYED_WORK(expires_work, rt_worker_func);
-static struct timer_list rt_secret_timer;
/*
* Interface to generic destination cache.
static struct rt_hash_bucket *rt_hash_table __read_mostly;
static unsigned rt_hash_mask __read_mostly;
static unsigned int rt_hash_log __read_mostly;
-static atomic_t rt_genid __read_mostly;
static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
#define RT_CACHE_STAT_INC(field) \
(__raw_get_cpu_var(rt_cache_stat).field++)
-static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx)
+static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx,
+ int genid)
{
return jhash_3words((__force u32)(__be32)(daddr),
(__force u32)(__be32)(saddr),
- idx, atomic_read(&rt_genid))
+ idx, genid)
& rt_hash_mask;
}
+static inline int rt_genid(struct net *net)
+{
+ return atomic_read(&net->ipv4.rt_genid);
+}
+
#ifdef CONFIG_PROC_FS
struct rt_cache_iter_state {
struct seq_net_private p;
struct rt_cache_iter_state *st = seq->private;
if (*pos)
return rt_cache_get_idx(seq, *pos - 1);
- st->genid = atomic_read(&rt_genid);
+ st->genid = rt_genid(seq_file_net(seq));
return SEQ_START_TOKEN;
}
return dev_net(rt1->u.dst.dev) == dev_net(rt2->u.dst.dev);
}
+static inline int rt_is_expired(struct rtable *rth)
+{
+ return rth->rt_genid != rt_genid(dev_net(rth->u.dst.dev));
+}
+
/*
* Perform a full scan of hash table and free all entries.
* Can be called by a softirq or a process.
{
unsigned int i;
struct rtable *rth, *next;
+ struct rtable * tail;
for (i = 0; i <= rt_hash_mask; i++) {
if (process_context && need_resched())
continue;
spin_lock_bh(rt_hash_lock_addr(i));
+#ifdef CONFIG_NET_NS
+ {
+ struct rtable ** prev, * p;
+
+ rth = rt_hash_table[i].chain;
+
+ /* defer releasing the head of the list after spin_unlock */
+ for (tail = rth; tail; tail = tail->u.dst.rt_next)
+ if (!rt_is_expired(tail))
+ break;
+ if (rth != tail)
+ rt_hash_table[i].chain = tail;
+
+ /* call rt_free on entries after the tail requiring flush */
+ prev = &rt_hash_table[i].chain;
+ for (p = *prev; p; p = next) {
+ next = p->u.dst.rt_next;
+ if (!rt_is_expired(p)) {
+ prev = &p->u.dst.rt_next;
+ } else {
+ *prev = next;
+ rt_free(p);
+ }
+ }
+ }
+#else
rth = rt_hash_table[i].chain;
rt_hash_table[i].chain = NULL;
+ tail = NULL;
+#endif
spin_unlock_bh(rt_hash_lock_addr(i));
- for (; rth; rth = next) {
+ for (; rth != tail; rth = next) {
next = rth->u.dst.rt_next;
rt_free(rth);
}
continue;
spin_lock_bh(rt_hash_lock_addr(i));
while ((rth = *rthp) != NULL) {
- if (rth->rt_genid != atomic_read(&rt_genid)) {
+ if (rt_is_expired(rth)) {
*rthp = rth->u.dst.rt_next;
rt_free(rth);
continue;
* many times (2^24) without giving recent rt_genid.
* Jenkins hash is strong enough that litle changes of rt_genid are OK.
*/
-static void rt_cache_invalidate(void)
+static void rt_cache_invalidate(struct net *net)
{
unsigned char shuffle;
get_random_bytes(&shuffle, sizeof(shuffle));
- atomic_add(shuffle + 1U, &rt_genid);
+ atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
}
/*
*/
void rt_cache_flush(struct net *net, int delay)
{
- rt_cache_invalidate();
+ rt_cache_invalidate(net);
if (delay >= 0)
rt_do_flush(!in_softirq());
}
/*
* We change rt_genid and let gc do the cleanup
*/
-static void rt_secret_rebuild(unsigned long dummy)
+static void rt_secret_rebuild(unsigned long __net)
{
- rt_cache_invalidate();
- mod_timer(&rt_secret_timer, jiffies + ip_rt_secret_interval);
+ struct net *net = (struct net *)__net;
+ rt_cache_invalidate(net);
+ mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
}
/*
rthp = &rt_hash_table[k].chain;
spin_lock_bh(rt_hash_lock_addr(k));
while ((rth = *rthp) != NULL) {
- if (rth->rt_genid == atomic_read(&rt_genid) &&
+ if (!rt_is_expired(rth) &&
!rt_may_expire(rth, tmo, expire)) {
tmo >>= 1;
rthp = &rth->u.dst.rt_next;
spin_lock_bh(rt_hash_lock_addr(hash));
while ((rth = *rthp) != NULL) {
- if (rth->rt_genid != atomic_read(&rt_genid)) {
+ if (rt_is_expired(rth)) {
*rthp = rth->u.dst.rt_next;
rt_free(rth);
continue;
spin_lock_bh(rt_hash_lock_addr(hash));
ip_rt_put(rt);
while ((aux = *rthp) != NULL) {
- if (aux == rt || (aux->rt_genid != atomic_read(&rt_genid))) {
+ if (aux == rt || rt_is_expired(aux)) {
*rthp = aux->u.dst.rt_next;
rt_free(aux);
continue;
for (i = 0; i < 2; i++) {
for (k = 0; k < 2; k++) {
- unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]);
+ unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
+ rt_genid(net));
rthp=&rt_hash_table[hash].chain;
rth->fl.fl4_src != skeys[i] ||
rth->fl.oif != ikeys[k] ||
rth->fl.iif != 0 ||
- rth->rt_genid != atomic_read(&rt_genid) ||
+ rt_is_expired(rth) ||
!net_eq(dev_net(rth->u.dst.dev), net)) {
rthp = &rth->u.dst.rt_next;
continue;
rt->u.dst.neighbour = NULL;
rt->u.dst.hh = NULL;
rt->u.dst.xfrm = NULL;
- rt->rt_genid = atomic_read(&rt_genid);
+ rt->rt_genid = rt_genid(net);
rt->rt_flags |= RTCF_REDIRECTED;
/* Gateway is different ... */
} else if ((rt->rt_flags & RTCF_REDIRECTED) ||
rt->u.dst.expires) {
unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
- rt->fl.oif);
+ rt->fl.oif,
+ rt_genid(dev_net(dst->dev)));
#if RT_CACHE_DEBUG >= 1
printk(KERN_DEBUG "ipv4_negative_advice: redirect to "
NIPQUAD_FMT "/%02x dropped\n",
break;
case ENETUNREACH:
code = ICMP_NET_UNREACH;
- IP_INC_STATS_BH(IPSTATS_MIB_INNOROUTES);
+ IP_INC_STATS_BH(dev_net(rt->u.dst.dev),
+ IPSTATS_MIB_INNOROUTES);
break;
case EACCES:
code = ICMP_PKT_FILTERED;
for (k = 0; k < 2; k++) {
for (i = 0; i < 2; i++) {
- unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]);
+ unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
+ rt_genid(net));
rcu_read_lock();
for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
rth->fl.iif != 0 ||
dst_metric_locked(&rth->u.dst, RTAX_MTU) ||
!net_eq(dev_net(rth->u.dst.dev), net) ||
- rth->rt_genid != atomic_read(&rt_genid))
+ !rt_is_expired(rth))
continue;
if (new_mtu < 68 || new_mtu >= old_mtu) {
rth->fl.oif = 0;
rth->rt_gateway = daddr;
rth->rt_spec_dst= spec_dst;
- rth->rt_genid = atomic_read(&rt_genid);
+ rth->rt_genid = rt_genid(dev_net(dev));
rth->rt_flags = RTCF_MULTICAST;
rth->rt_type = RTN_MULTICAST;
if (our) {
RT_CACHE_STAT_INC(in_slow_mc);
in_dev_put(in_dev);
- hash = rt_hash(daddr, saddr, dev->ifindex);
+ hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
return rt_intern_hash(hash, rth, &skb->rtable);
e_nobufs:
rth->u.dst.input = ip_forward;
rth->u.dst.output = ip_output;
- rth->rt_genid = atomic_read(&rt_genid);
+ rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev));
rt_set_nexthop(rth, res, itag);
return err;
/* put it into the cache */
- hash = rt_hash(daddr, saddr, fl->iif);
+ hash = rt_hash(daddr, saddr, fl->iif,
+ rt_genid(dev_net(rth->u.dst.dev)));
return rt_intern_hash(hash, rth, &skb->rtable);
}
goto e_nobufs;
rth->u.dst.output= ip_rt_bug;
- rth->rt_genid = atomic_read(&rt_genid);
+ rth->rt_genid = rt_genid(net);
atomic_set(&rth->u.dst.__refcnt, 1);
rth->u.dst.flags= DST_HOST;
rth->rt_flags &= ~RTCF_LOCAL;
}
rth->rt_type = res.type;
- hash = rt_hash(daddr, saddr, fl.iif);
+ hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net));
err = rt_intern_hash(hash, rth, &skb->rtable);
goto done;
net = dev_net(dev);
tos &= IPTOS_RT_MASK;
- hash = rt_hash(daddr, saddr, iif);
+ hash = rt_hash(daddr, saddr, iif, rt_genid(net));
rcu_read_lock();
for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
(rth->fl.fl4_tos ^ tos)) == 0 &&
rth->fl.mark == skb->mark &&
net_eq(dev_net(rth->u.dst.dev), net) &&
- rth->rt_genid == atomic_read(&rt_genid)) {
+ !rt_is_expired(rth)) {
dst_use(&rth->u.dst, jiffies);
RT_CACHE_STAT_INC(in_hit);
rcu_read_unlock();
rth->rt_spec_dst= fl->fl4_src;
rth->u.dst.output=ip_output;
- rth->rt_genid = atomic_read(&rt_genid);
+ rth->rt_genid = rt_genid(dev_net(dev_out));
RT_CACHE_STAT_INC(out_slow_tot);
int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags);
unsigned hash;
if (err == 0) {
- hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif);
+ hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif,
+ rt_genid(dev_net(dev_out)));
err = rt_intern_hash(hash, rth, rp);
}
unsigned hash;
struct rtable *rth;
- hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif);
+ hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net));
rcu_read_lock_bh();
for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
!((rth->fl.fl4_tos ^ flp->fl4_tos) &
(IPTOS_RT_MASK | RTO_ONLINK)) &&
net_eq(dev_net(rth->u.dst.dev), net) &&
- rth->rt_genid == atomic_read(&rt_genid)) {
+ !rt_is_expired(rth)) {
dst_use(&rth->u.dst, jiffies);
RT_CACHE_STAT_INC(out_hit);
rcu_read_unlock_bh();
};
-static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp)
+static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi *flp)
{
struct rtable *ort = *rp;
struct rtable *rt = (struct rtable *)
rt->idev = ort->idev;
if (rt->idev)
in_dev_hold(rt->idev);
- rt->rt_genid = atomic_read(&rt_genid);
+ rt->rt_genid = rt_genid(net);
rt->rt_flags = ort->rt_flags;
rt->rt_type = ort->rt_type;
rt->rt_dst = ort->rt_dst;
err = __xfrm_lookup((struct dst_entry **)rp, flp, sk,
flags ? XFRM_LOOKUP_WAIT : 0);
if (err == -EREMOTE)
- err = ipv4_dst_blackhole(rp, flp);
+ err = ipv4_dst_blackhole(net, rp, flp);
return err;
}
rt = rcu_dereference(rt->u.dst.rt_next), idx++) {
if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx)
continue;
- if (rt->rt_genid != atomic_read(&rt_genid))
+ if (rt_is_expired(rt))
continue;
skb->dst = dst_clone(&rt->u.dst);
if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
}
#ifdef CONFIG_SYSCTL
-static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
+static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
struct file *filp, void __user *buffer,
size_t *lenp, loff_t *ppos)
{
if (write) {
int flush_delay;
- static DEFINE_MUTEX(flush_mutex);
+ ctl_table ctl;
+ struct net *net;
- mutex_lock(&flush_mutex);
- ctl->data = &flush_delay;
- proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
- ctl->data = NULL;
- mutex_unlock(&flush_mutex);
+ memcpy(&ctl, __ctl, sizeof(ctl));
+ ctl.data = &flush_delay;
+ proc_dointvec(&ctl, write, filp, buffer, lenp, ppos);
- rt_cache_flush(&init_net, flush_delay);
+ net = (struct net *)__ctl->extra1;
+ rt_cache_flush(net, flush_delay);
return 0;
}
size_t newlen)
{
int delay;
+ struct net *net;
if (newlen != sizeof(int))
return -EINVAL;
if (get_user(delay, (int __user *)newval))
return -EFAULT;
- rt_cache_flush(&init_net, delay);
+ net = (struct net *)table->extra1;
+ rt_cache_flush(net, delay);
return 0;
}
ctl_table ipv4_route_table[] = {
- {
- .ctl_name = NET_IPV4_ROUTE_FLUSH,
- .procname = "flush",
- .maxlen = sizeof(int),
- .mode = 0200,
- .proc_handler = &ipv4_sysctl_rtcache_flush,
- .strategy = &ipv4_sysctl_rtcache_flush_strategy,
- },
{
.ctl_name = NET_IPV4_ROUTE_GC_THRESH,
.procname = "gc_thresh",
},
{ .ctl_name = 0 }
};
+
+static __net_initdata struct ctl_path ipv4_route_path[] = {
+ { .procname = "net", .ctl_name = CTL_NET, },
+ { .procname = "ipv4", .ctl_name = NET_IPV4, },
+ { .procname = "route", .ctl_name = NET_IPV4_ROUTE, },
+ { },
+};
+
+
+static struct ctl_table ipv4_route_flush_table[] = {
+ {
+ .ctl_name = NET_IPV4_ROUTE_FLUSH,
+ .procname = "flush",
+ .maxlen = sizeof(int),
+ .mode = 0200,
+ .proc_handler = &ipv4_sysctl_rtcache_flush,
+ .strategy = &ipv4_sysctl_rtcache_flush_strategy,
+ },
+ { .ctl_name = 0 },
+};
+
+static __net_init int sysctl_route_net_init(struct net *net)
+{
+ struct ctl_table *tbl;
+
+ tbl = ipv4_route_flush_table;
+ if (net != &init_net) {
+ tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
+ if (tbl == NULL)
+ goto err_dup;
+ }
+ tbl[0].extra1 = net;
+
+ net->ipv4.route_hdr =
+ register_net_sysctl_table(net, ipv4_route_path, tbl);
+ if (net->ipv4.route_hdr == NULL)
+ goto err_reg;
+ return 0;
+
+err_reg:
+ if (tbl != ipv4_route_flush_table)
+ kfree(tbl);
+err_dup:
+ return -ENOMEM;
+}
+
+static __net_exit void sysctl_route_net_exit(struct net *net)
+{
+ struct ctl_table *tbl;
+
+ tbl = net->ipv4.route_hdr->ctl_table_arg;
+ unregister_net_sysctl_table(net->ipv4.route_hdr);
+ BUG_ON(tbl == ipv4_route_flush_table);
+ kfree(tbl);
+}
+
+static __net_initdata struct pernet_operations sysctl_route_ops = {
+ .init = sysctl_route_net_init,
+ .exit = sysctl_route_net_exit,
+};
#endif
+
+static __net_init int rt_secret_timer_init(struct net *net)
+{
+ atomic_set(&net->ipv4.rt_genid,
+ (int) ((num_physpages ^ (num_physpages>>8)) ^
+ (jiffies ^ (jiffies >> 7))));
+
+ net->ipv4.rt_secret_timer.function = rt_secret_rebuild;
+ net->ipv4.rt_secret_timer.data = (unsigned long)net;
+ init_timer_deferrable(&net->ipv4.rt_secret_timer);
+
+ net->ipv4.rt_secret_timer.expires =
+ jiffies + net_random() % ip_rt_secret_interval +
+ ip_rt_secret_interval;
+ add_timer(&net->ipv4.rt_secret_timer);
+ return 0;
+}
+
+static __net_exit void rt_secret_timer_exit(struct net *net)
+{
+ del_timer_sync(&net->ipv4.rt_secret_timer);
+}
+
+static __net_initdata struct pernet_operations rt_secret_timer_ops = {
+ .init = rt_secret_timer_init,
+ .exit = rt_secret_timer_exit,
+};
+
+
#ifdef CONFIG_NET_CLS_ROUTE
struct ip_rt_acct *ip_rt_acct __read_mostly;
#endif /* CONFIG_NET_CLS_ROUTE */
{
int rc = 0;
- atomic_set(&rt_genid, (int) ((num_physpages ^ (num_physpages>>8)) ^
- (jiffies ^ (jiffies >> 7))));
-
#ifdef CONFIG_NET_CLS_ROUTE
ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct));
if (!ip_rt_acct)
devinet_init();
ip_fib_init();
- rt_secret_timer.function = rt_secret_rebuild;
- rt_secret_timer.data = 0;
- init_timer_deferrable(&rt_secret_timer);
-
/* All the timers, started at system startup tend
to synchronize. Perturb it a bit.
*/
schedule_delayed_work(&expires_work,
net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
- rt_secret_timer.expires = jiffies + net_random() % ip_rt_secret_interval +
- ip_rt_secret_interval;
- add_timer(&rt_secret_timer);
+ if (register_pernet_subsys(&rt_secret_timer_ops))
+ printk(KERN_ERR "Unable to setup rt_secret_timer\n");
if (ip_rt_proc_init())
printk(KERN_ERR "Unable to create route proc files\n");
#endif
rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL);
+#ifdef CONFIG_SYSCTL
+ register_pernet_subsys(&sysctl_route_ops);
+#endif
return rc;
}