* routers in REACHABLE, STALE, DELAY or PROBE states).
* - always select the same router if it is (probably)
* reachable. otherwise, round-robin the list.
+ * Ville Nuorvala
+ * Fixed routing subtrees.
*/
#include <linux/capability.h>
#define CLONE_OFFLINK_ROUTE 0
-#define RT6_SELECT_F_IFACE 0x1
-#define RT6_SELECT_F_REACHABLE 0x2
-
static int ip6_rt_max_size = 4096;
static int ip6_rt_gc_min_interval = HZ / 2;
static int ip6_rt_gc_timeout = 60*HZ;
int m, n;
m = rt6_check_dev(rt, oif);
- if (!m && (strict & RT6_SELECT_F_IFACE))
+ if (!m && (strict & RT6_LOOKUP_F_IFACE))
return -1;
#ifdef CONFIG_IPV6_ROUTER_PREF
m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
n = rt6_check_neigh(rt);
if (n > 1)
m |= 16;
- else if (!n && strict & RT6_SELECT_F_REACHABLE)
+ else if (!n && strict & RT6_LOOKUP_F_REACHABLE)
return -1;
return m;
}
}
if (!match &&
- (strict & RT6_SELECT_F_REACHABLE) &&
+ (strict & RT6_LOOKUP_F_REACHABLE) &&
last && last != rt0) {
/* no entries matched; do round-robin */
static DEFINE_SPINLOCK(lock);
}
#endif
-#define BACKTRACK() \
-if (rt == &ip6_null_entry && flags & RT6_F_STRICT) { \
- while ((fn = fn->parent) != NULL) { \
- if (fn->fn_flags & RTN_TL_ROOT) { \
- dst_hold(&rt->u.dst); \
- goto out; \
+#define BACKTRACK(saddr) \
+do { \
+ if (rt == &ip6_null_entry) { \
+ struct fib6_node *pn; \
+ while (fn) { \
+ if (fn->fn_flags & RTN_TL_ROOT) \
+ goto out; \
+ pn = fn->parent; \
+ if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
+ fn = fib6_lookup(pn->subtree, NULL, saddr); \
+ else \
+ fn = pn; \
+ if (fn->fn_flags & RTN_RTINFO) \
+ goto restart; \
} \
- if (fn->fn_flags & RTN_RTINFO) \
- goto restart; \
} \
-}
+} while(0)
static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
struct flowi *fl, int flags)
fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
restart:
rt = fn->leaf;
- rt = rt6_device_match(rt, fl->oif, flags & RT6_F_STRICT);
- BACKTRACK();
+ rt = rt6_device_match(rt, fl->oif, flags);
+ BACKTRACK(&fl->fl6_src);
dst_hold(&rt->u.dst);
out:
read_unlock_bh(&table->tb6_lock);
},
};
struct dst_entry *dst;
- int flags = strict ? RT6_F_STRICT : 0;
+ int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
if (dst->error == 0)
int strict = 0;
int attempts = 3;
int err;
- int reachable = RT6_SELECT_F_REACHABLE;
+ int reachable = RT6_LOOKUP_F_REACHABLE;
- if (flags & RT6_F_STRICT)
- strict = RT6_SELECT_F_IFACE;
+ strict |= flags & RT6_LOOKUP_F_IFACE;
relookup:
read_lock_bh(&table->tb6_lock);
restart:
rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
- BACKTRACK();
+ BACKTRACK(&fl->fl6_src);
if (rt == &ip6_null_entry ||
rt->rt6i_flags & RTF_CACHE)
goto out;
.ip6_u = {
.daddr = iph->daddr,
.saddr = iph->saddr,
+#ifdef CONFIG_IPV6_ROUTE_FWMARK
+ .fwmark = skb->nfmark,
+#endif
.flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
},
},
.proto = iph->nexthdr,
};
- int flags = 0;
-
- if (rt6_need_strict(&iph->daddr))
- flags |= RT6_F_STRICT;
+ int flags = rt6_need_strict(&iph->daddr) ? RT6_LOOKUP_F_IFACE : 0;
skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
}
int strict = 0;
int attempts = 3;
int err;
- int reachable = RT6_SELECT_F_REACHABLE;
+ int reachable = RT6_LOOKUP_F_REACHABLE;
- if (flags & RT6_F_STRICT)
- strict = RT6_SELECT_F_IFACE;
+ strict |= flags & RT6_LOOKUP_F_IFACE;
relookup:
read_lock_bh(&table->tb6_lock);
restart:
rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
- BACKTRACK();
+ BACKTRACK(&fl->fl6_src);
if (rt == &ip6_null_entry ||
rt->rt6i_flags & RTF_CACHE)
goto out;
int flags = 0;
if (rt6_need_strict(&fl->fl6_dst))
- flags |= RT6_F_STRICT;
+ flags |= RT6_LOOKUP_F_IFACE;
return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
}
/*
* Handle redirects
*/
-void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
- struct neighbour *neigh, u8 *lladdr, int on_link)
+struct ip6rd_flowi {
+ struct flowi fl;
+ struct in6_addr gateway;
+};
+
+static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
+ struct flowi *fl,
+ int flags)
{
- struct rt6_info *rt, *nrt = NULL;
+ struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
+ struct rt6_info *rt;
struct fib6_node *fn;
- struct fib6_table *table;
- struct netevent_redirect netevent;
-
- /* TODO: Very lazy, might need to check all tables */
- table = fib6_get_table(RT6_TABLE_MAIN);
- if (table == NULL)
- return;
/*
* Get the "current" route for this destination and
*/
read_lock_bh(&table->tb6_lock);
- fn = fib6_lookup(&table->tb6_root, dest, NULL);
+ fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
restart:
for (rt = fn->leaf; rt; rt = rt->u.next) {
/*
continue;
if (!(rt->rt6i_flags & RTF_GATEWAY))
continue;
- if (neigh->dev != rt->rt6i_dev)
+ if (fl->oif != rt->rt6i_dev->ifindex)
continue;
- if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway))
+ if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
continue;
break;
}
- if (rt)
- dst_hold(&rt->u.dst);
- else if (rt6_need_strict(dest)) {
- while ((fn = fn->parent) != NULL) {
- if (fn->fn_flags & RTN_ROOT)
- break;
- if (fn->fn_flags & RTN_RTINFO)
- goto restart;
- }
- }
+
+ if (!rt)
+ rt = &ip6_null_entry;
+ BACKTRACK(&fl->fl6_src);
+out:
+ dst_hold(&rt->u.dst);
+
read_unlock_bh(&table->tb6_lock);
- if (!rt) {
+ return rt;
+};
+
+static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
+ struct in6_addr *src,
+ struct in6_addr *gateway,
+ struct net_device *dev)
+{
+ struct ip6rd_flowi rdfl = {
+ .fl = {
+ .oif = dev->ifindex,
+ .nl_u = {
+ .ip6_u = {
+ .daddr = *dest,
+ .saddr = *src,
+ },
+ },
+ },
+ .gateway = *gateway,
+ };
+ int flags = rt6_need_strict(dest) ? RT6_LOOKUP_F_IFACE : 0;
+
+ return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
+}
+
+void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
+ struct in6_addr *saddr,
+ struct neighbour *neigh, u8 *lladdr, int on_link)
+{
+ struct rt6_info *rt, *nrt = NULL;
+ struct netevent_redirect netevent;
+
+ rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
+
+ if (rt == &ip6_null_entry) {
if (net_ratelimit())
printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
"for redirect target\n");
- return;
+ goto out;
}
/*
}
static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
- [RTA_GATEWAY] = { .minlen = sizeof(struct in6_addr) },
+ [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
[RTA_OIF] = { .type = NLA_U32 },
+ [RTA_IIF] = { .type = NLA_U32 },
[RTA_PRIORITY] = { .type = NLA_U32 },
[RTA_METRICS] = { .type = NLA_NESTED },
};
int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
{
- struct rtattr **rta = arg;
- int iif = 0;
- int err = -ENOBUFS;
+ struct nlattr *tb[RTA_MAX+1];
+ struct rt6_info *rt;
struct sk_buff *skb;
+ struct rtmsg *rtm;
struct flowi fl;
- struct rt6_info *rt;
+ int err, iif = 0;
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (skb == NULL)
- goto out;
-
- /* Reserve room for dummy headers, this skb can pass
- through good chunk of routing engine.
- */
- skb->mac.raw = skb->data;
- skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
+ err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
+ if (err < 0)
+ goto errout;
+ err = -EINVAL;
memset(&fl, 0, sizeof(fl));
- if (rta[RTA_SRC-1])
- ipv6_addr_copy(&fl.fl6_src,
- (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
- if (rta[RTA_DST-1])
- ipv6_addr_copy(&fl.fl6_dst,
- (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
- if (rta[RTA_IIF-1])
- memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
+ if (tb[RTA_SRC]) {
+ if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
+ goto errout;
+
+ ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
+ }
+
+ if (tb[RTA_DST]) {
+ if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
+ goto errout;
+
+ ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
+ }
+
+ if (tb[RTA_IIF])
+ iif = nla_get_u32(tb[RTA_IIF]);
+
+ if (tb[RTA_OIF])
+ fl.oif = nla_get_u32(tb[RTA_OIF]);
if (iif) {
struct net_device *dev;
dev = __dev_get_by_index(iif);
if (!dev) {
err = -ENODEV;
- goto out_free;
+ goto errout;
}
}
- fl.oif = 0;
- if (rta[RTA_OIF-1])
- memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb == NULL) {
+ err = -ENOBUFS;
+ goto errout;
+ }
- rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
+ /* Reserve room for dummy headers, this skb can pass
+ through good chunk of routing engine.
+ */
+ skb->mac.raw = skb->data;
+ skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
+ rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
skb->dst = &rt->u.dst;
- NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
- err = rt6_fill_node(skb, rt,
- &fl.fl6_dst, &fl.fl6_src,
- iif,
+ err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
nlh->nlmsg_seq, 0, 0);
if (err < 0) {
- err = -EMSGSIZE;
- goto out_free;
+ kfree_skb(skb);
+ goto errout;
}
err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
-out:
+errout:
return err;
-out_free:
- kfree_skb(skb);
- goto out;
}
void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
{
struct proc_dir_entry *p;
- ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
- sizeof(struct rt6_info),
- 0, SLAB_HWCACHE_ALIGN,
- NULL, NULL);
- if (!ip6_dst_ops.kmem_cachep)
- panic("cannot create ip6_dst_cache");
-
+ ip6_dst_ops.kmem_cachep =
+ kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
fib6_init();
#ifdef CONFIG_PROC_FS
p = proc_net_create("ipv6_route", 0, rt6_proc_info);