/*
* Linux NET3: IP/IP protocol decoder.
*
- * Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
- *
* Authors:
* Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
*
#include <net/ipip.h>
#include <net/inet_ecn.h>
#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#define HASH_SIZE 16
#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
+static int ipip_net_id;
+struct ipip_net {
+ struct ip_tunnel *tunnels_r_l[HASH_SIZE];
+ struct ip_tunnel *tunnels_r[HASH_SIZE];
+ struct ip_tunnel *tunnels_l[HASH_SIZE];
+ struct ip_tunnel *tunnels_wc[1];
+ struct ip_tunnel **tunnels[4];
+
+ struct net_device *fb_tunnel_dev;
+};
+
static int ipip_fb_tunnel_init(struct net_device *dev);
static int ipip_tunnel_init(struct net_device *dev);
static void ipip_tunnel_setup(struct net_device *dev);
-static struct net_device *ipip_fb_tunnel_dev;
-
-static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
-static struct ip_tunnel *tunnels_r[HASH_SIZE];
-static struct ip_tunnel *tunnels_l[HASH_SIZE];
-static struct ip_tunnel *tunnels_wc[1];
-static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
-
static DEFINE_RWLOCK(ipip_lock);
-static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
+static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
+ __be32 remote, __be32 local)
{
unsigned h0 = HASH(remote);
unsigned h1 = HASH(local);
struct ip_tunnel *t;
+ struct ipip_net *ipn = net_generic(net, ipip_net_id);
- for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
+ for (t = ipn->tunnels_r_l[h0^h1]; t; t = t->next) {
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
return t;
}
- for (t = tunnels_r[h0]; t; t = t->next) {
+ for (t = ipn->tunnels_r[h0]; t; t = t->next) {
if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
return t;
}
- for (t = tunnels_l[h1]; t; t = t->next) {
+ for (t = ipn->tunnels_l[h1]; t; t = t->next) {
if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
return t;
}
- if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
+ if ((t = ipn->tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
return t;
return NULL;
}
-static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms)
+static struct ip_tunnel **__ipip_bucket(struct ipip_net *ipn,
+ struct ip_tunnel_parm *parms)
{
__be32 remote = parms->iph.daddr;
__be32 local = parms->iph.saddr;
prio |= 1;
h ^= HASH(local);
}
- return &tunnels[prio][h];
+ return &ipn->tunnels[prio][h];
}
-static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
+static inline struct ip_tunnel **ipip_bucket(struct ipip_net *ipn,
+ struct ip_tunnel *t)
{
- return __ipip_bucket(&t->parms);
+ return __ipip_bucket(ipn, &t->parms);
}
-static void ipip_tunnel_unlink(struct ip_tunnel *t)
+static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
{
struct ip_tunnel **tp;
- for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
+ for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) {
if (t == *tp) {
write_lock_bh(&ipip_lock);
*tp = t->next;
}
}
-static void ipip_tunnel_link(struct ip_tunnel *t)
+static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
{
- struct ip_tunnel **tp = ipip_bucket(t);
+ struct ip_tunnel **tp = ipip_bucket(ipn, t);
t->next = *tp;
write_lock_bh(&ipip_lock);
write_unlock_bh(&ipip_lock);
}
-static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
+static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
+ struct ip_tunnel_parm *parms, int create)
{
__be32 remote = parms->iph.daddr;
__be32 local = parms->iph.saddr;
struct ip_tunnel *t, **tp, *nt;
struct net_device *dev;
char name[IFNAMSIZ];
+ struct ipip_net *ipn = net_generic(net, ipip_net_id);
- for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
+ for (tp = __ipip_bucket(ipn, parms); (t = *tp) != NULL; tp = &t->next) {
if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
return t;
}
if (dev == NULL)
return NULL;
+ dev_net_set(dev, net);
+
+ if (strchr(name, '%')) {
+ if (dev_alloc_name(dev, name) < 0)
+ goto failed_free;
+ }
+
nt = netdev_priv(dev);
dev->init = ipip_tunnel_init;
nt->parms = *parms;
- if (register_netdevice(dev) < 0) {
- free_netdev(dev);
- goto failed;
- }
+ if (register_netdevice(dev) < 0)
+ goto failed_free;
dev_hold(dev);
- ipip_tunnel_link(nt);
+ ipip_tunnel_link(ipn, nt);
return nt;
-failed:
+failed_free:
+ free_netdev(dev);
return NULL;
}
static void ipip_tunnel_uninit(struct net_device *dev)
{
- if (dev == ipip_fb_tunnel_dev) {
+ struct net *net = dev_net(dev);
+ struct ipip_net *ipn = net_generic(net, ipip_net_id);
+
+ if (dev == ipn->fb_tunnel_dev) {
write_lock_bh(&ipip_lock);
- tunnels_wc[0] = NULL;
+ ipn->tunnels_wc[0] = NULL;
write_unlock_bh(&ipip_lock);
} else
- ipip_tunnel_unlink(netdev_priv(dev));
+ ipip_tunnel_unlink(ipn, netdev_priv(dev));
dev_put(dev);
}
static int ipip_err(struct sk_buff *skb, u32 info)
{
-#ifndef I_WISH_WORLD_WERE_PERFECT
-/* It is not :-( All the routers (except for Linux) return only
+/* All the routers (except for Linux) return only
8 bytes of packet payload. It means, that precise relaying of
ICMP in the real Internet is absolutely infeasible.
*/
err = -ENOENT;
read_lock(&ipip_lock);
- t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
+ t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
if (t == NULL || t->parms.iph.daddr == 0)
goto out;
out:
read_unlock(&ipip_lock);
return err;
-#else
- struct iphdr *iph = (struct iphdr*)dp;
- int hlen = iph->ihl<<2;
- struct iphdr *eiph;
- const int type = icmp_hdr(skb)->type;
- const int code = icmp_hdr(skb)->code;
- int rel_type = 0;
- int rel_code = 0;
- __be32 rel_info = 0;
- __u32 n = 0;
- struct sk_buff *skb2;
- struct flowi fl;
- struct rtable *rt;
-
- if (len < hlen + sizeof(struct iphdr))
- return 0;
- eiph = (struct iphdr*)(dp + hlen);
-
- switch (type) {
- default:
- return 0;
- case ICMP_PARAMETERPROB:
- n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
- if (n < hlen)
- return 0;
-
- /* So... This guy found something strange INSIDE encapsulated
- packet. Well, he is fool, but what can we do ?
- */
- rel_type = ICMP_PARAMETERPROB;
- rel_info = htonl((n - hlen) << 24);
- break;
-
- case ICMP_DEST_UNREACH:
- switch (code) {
- case ICMP_SR_FAILED:
- case ICMP_PORT_UNREACH:
- /* Impossible event. */
- return 0;
- case ICMP_FRAG_NEEDED:
- /* And it is the only really necessary thing :-) */
- n = ntohs(icmp_hdr(skb)->un.frag.mtu);
- if (n < hlen+68)
- return 0;
- n -= hlen;
- /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
- if (n > ntohs(eiph->tot_len))
- return 0;
- rel_info = htonl(n);
- break;
- default:
- /* All others are translated to HOST_UNREACH.
- rfc2003 contains "deep thoughts" about NET_UNREACH,
- I believe, it is just ether pollution. --ANK
- */
- rel_type = ICMP_DEST_UNREACH;
- rel_code = ICMP_HOST_UNREACH;
- break;
- }
- break;
- case ICMP_TIME_EXCEEDED:
- if (code != ICMP_EXC_TTL)
- return 0;
- break;
- }
-
- /* Prepare fake skb to feed it to icmp_send */
- skb2 = skb_clone(skb, GFP_ATOMIC);
- if (skb2 == NULL)
- return 0;
- dst_release(skb2->dst);
- skb2->dst = NULL;
- skb_pull(skb2, skb->data - (u8*)eiph);
- skb_reset_network_header(skb2);
-
- /* Try to guess incoming interface */
- memset(&fl, 0, sizeof(fl));
- fl.fl4_daddr = eiph->saddr;
- fl.fl4_tos = RT_TOS(eiph->tos);
- fl.proto = IPPROTO_IPIP;
- if (ip_route_output_key(&init_net, &rt, &key)) {
- kfree_skb(skb2);
- return 0;
- }
- skb2->dev = rt->u.dst.dev;
-
- /* route "incoming" packet */
- if (rt->rt_flags&RTCF_LOCAL) {
- ip_rt_put(rt);
- rt = NULL;
- fl.fl4_daddr = eiph->daddr;
- fl.fl4_src = eiph->saddr;
- fl.fl4_tos = eiph->tos;
- if (ip_route_output_key(&init_net, &rt, &fl) ||
- rt->u.dst.dev->type != ARPHRD_TUNNEL) {
- ip_rt_put(rt);
- kfree_skb(skb2);
- return 0;
- }
- } else {
- ip_rt_put(rt);
- if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
- skb2->dst->dev->type != ARPHRD_TUNNEL) {
- kfree_skb(skb2);
- return 0;
- }
- }
-
- /* change mtu on this route */
- if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
- if (n > dst_mtu(skb2->dst)) {
- kfree_skb(skb2);
- return 0;
- }
- skb2->dst->ops->update_pmtu(skb2->dst, n);
- } else if (type == ICMP_TIME_EXCEEDED) {
- struct ip_tunnel *t = netdev_priv(skb2->dev);
- if (t->parms.iph.ttl) {
- rel_type = ICMP_DEST_UNREACH;
- rel_code = ICMP_HOST_UNREACH;
- }
- }
-
- icmp_send(skb2, rel_type, rel_code, rel_info);
- kfree_skb(skb2);
- return 0;
-#endif
}
static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
const struct iphdr *iph = ip_hdr(skb);
read_lock(&ipip_lock);
- if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
+ if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev),
+ iph->saddr, iph->daddr)) != NULL) {
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
read_unlock(&ipip_lock);
kfree_skb(skb);
skb->protocol = htons(ETH_P_IP);
skb->pkt_type = PACKET_HOST;
- tunnel->stat.rx_packets++;
- tunnel->stat.rx_bytes += skb->len;
+ tunnel->dev->stats.rx_packets++;
+ tunnel->dev->stats.rx_bytes += skb->len;
skb->dev = tunnel->dev;
dst_release(skb->dst);
skb->dst = NULL;
static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct net_device_stats *stats = &tunnel->stat;
+ struct net_device_stats *stats = &tunnel->dev->stats;
struct iphdr *tiph = &tunnel->parms.iph;
u8 tos = tunnel->parms.iph.tos;
__be16 df = tiph->frag_off;
int mtu;
if (tunnel->recursion++) {
- tunnel->stat.collisions++;
+ stats->collisions++;
goto tx_error;
}
if (!dst) {
/* NBMA tunnel */
- if ((rt = (struct rtable*)skb->dst) == NULL) {
- tunnel->stat.tx_fifo_errors++;
+ if ((rt = skb->rtable) == NULL) {
+ stats->tx_fifo_errors++;
goto tx_error;
}
if ((dst = rt->rt_gateway) == 0)
.saddr = tiph->saddr,
.tos = RT_TOS(tos) } },
.proto = IPPROTO_IPIP };
- if (ip_route_output_key(&init_net, &rt, &fl)) {
- tunnel->stat.tx_carrier_errors++;
+ if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
+ stats->tx_carrier_errors++;
goto tx_error_icmp;
}
}
if (tdev == dev) {
ip_rt_put(rt);
- tunnel->stat.collisions++;
+ stats->collisions++;
goto tx_error;
}
mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
if (mtu < 68) {
- tunnel->stat.collisions++;
+ stats->collisions++;
ip_rt_put(rt);
goto tx_error;
}
.tos = RT_TOS(iph->tos) } },
.proto = IPPROTO_IPIP };
struct rtable *rt;
- if (!ip_route_output_key(&init_net, &rt, &fl)) {
+ if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
tdev = rt->u.dst.dev;
ip_rt_put(rt);
}
}
if (!tdev && tunnel->parms.link)
- tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
+ tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
if (tdev) {
dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
int err = 0;
struct ip_tunnel_parm p;
struct ip_tunnel *t;
+ struct net *net = dev_net(dev);
+ struct ipip_net *ipn = net_generic(net, ipip_net_id);
switch (cmd) {
case SIOCGETTUNNEL:
t = NULL;
- if (dev == ipip_fb_tunnel_dev) {
+ if (dev == ipn->fb_tunnel_dev) {
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
err = -EFAULT;
break;
}
- t = ipip_tunnel_locate(&p, 0);
+ t = ipip_tunnel_locate(net, &p, 0);
}
if (t == NULL)
t = netdev_priv(dev);
if (p.iph.ttl)
p.iph.frag_off |= htons(IP_DF);
- t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
+ t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
- if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
+ if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
if (t != NULL) {
if (t->dev != dev) {
err = -EEXIST;
break;
}
t = netdev_priv(dev);
- ipip_tunnel_unlink(t);
+ ipip_tunnel_unlink(ipn, t);
t->parms.iph.saddr = p.iph.saddr;
t->parms.iph.daddr = p.iph.daddr;
memcpy(dev->dev_addr, &p.iph.saddr, 4);
memcpy(dev->broadcast, &p.iph.daddr, 4);
- ipip_tunnel_link(t);
+ ipip_tunnel_link(ipn, t);
netdev_state_change(dev);
}
}
if (!capable(CAP_NET_ADMIN))
goto done;
- if (dev == ipip_fb_tunnel_dev) {
+ if (dev == ipn->fb_tunnel_dev) {
err = -EFAULT;
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
goto done;
err = -ENOENT;
- if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
+ if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
goto done;
err = -EPERM;
- if (t->dev == ipip_fb_tunnel_dev)
+ if (t->dev == ipn->fb_tunnel_dev)
goto done;
dev = t->dev;
}
return err;
}
-static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
-{
- return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
-}
-
static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
{
if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
{
dev->uninit = ipip_tunnel_uninit;
dev->hard_start_xmit = ipip_tunnel_xmit;
- dev->get_stats = ipip_tunnel_get_stats;
dev->do_ioctl = ipip_tunnel_ioctl;
dev->change_mtu = ipip_tunnel_change_mtu;
dev->destructor = free_netdev;
dev->flags = IFF_NOARP;
dev->iflink = 0;
dev->addr_len = 4;
+ dev->features |= NETIF_F_NETNS_LOCAL;
}
static int ipip_tunnel_init(struct net_device *dev)
return 0;
}
-static int __init ipip_fb_tunnel_init(struct net_device *dev)
+static int ipip_fb_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
struct iphdr *iph = &tunnel->parms.iph;
+ struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
tunnel->dev = dev;
strcpy(tunnel->parms.name, dev->name);
iph->ihl = 5;
dev_hold(dev);
- tunnels_wc[0] = tunnel;
+ ipn->tunnels_wc[0] = tunnel;
return 0;
}
static char banner[] __initdata =
KERN_INFO "IPv4 over IPv4 tunneling driver\n";
-static int __init ipip_init(void)
+static void ipip_destroy_tunnels(struct ipip_net *ipn)
+{
+ int prio;
+
+ for (prio = 1; prio < 4; prio++) {
+ int h;
+ for (h = 0; h < HASH_SIZE; h++) {
+ struct ip_tunnel *t;
+ while ((t = ipn->tunnels[prio][h]) != NULL)
+ unregister_netdevice(t->dev);
+ }
+ }
+}
+
+static int ipip_init_net(struct net *net)
{
int err;
+ struct ipip_net *ipn;
- printk(banner);
+ err = -ENOMEM;
+ ipn = kzalloc(sizeof(struct ipip_net), GFP_KERNEL);
+ if (ipn == NULL)
+ goto err_alloc;
- if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
- printk(KERN_INFO "ipip init: can't register tunnel\n");
- return -EAGAIN;
- }
+ err = net_assign_generic(net, ipip_net_id, ipn);
+ if (err < 0)
+ goto err_assign;
- ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
+ ipn->tunnels[0] = ipn->tunnels_wc;
+ ipn->tunnels[1] = ipn->tunnels_l;
+ ipn->tunnels[2] = ipn->tunnels_r;
+ ipn->tunnels[3] = ipn->tunnels_r_l;
+
+ ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
"tunl0",
ipip_tunnel_setup);
- if (!ipip_fb_tunnel_dev) {
+ if (!ipn->fb_tunnel_dev) {
err = -ENOMEM;
- goto err1;
+ goto err_alloc_dev;
}
- ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
+ ipn->fb_tunnel_dev->init = ipip_fb_tunnel_init;
+ dev_net_set(ipn->fb_tunnel_dev, net);
+
+ if ((err = register_netdev(ipn->fb_tunnel_dev)))
+ goto err_reg_dev;
- if ((err = register_netdev(ipip_fb_tunnel_dev)))
- goto err2;
- out:
+ return 0;
+
+err_reg_dev:
+ free_netdev(ipn->fb_tunnel_dev);
+err_alloc_dev:
+ /* nothing */
+err_assign:
+ kfree(ipn);
+err_alloc:
return err;
- err2:
- free_netdev(ipip_fb_tunnel_dev);
- err1:
- xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
- goto out;
}
-static void __exit ipip_destroy_tunnels(void)
+static void ipip_exit_net(struct net *net)
{
- int prio;
+ struct ipip_net *ipn;
- for (prio = 1; prio < 4; prio++) {
- int h;
- for (h = 0; h < HASH_SIZE; h++) {
- struct ip_tunnel *t;
- while ((t = tunnels[prio][h]) != NULL)
- unregister_netdevice(t->dev);
- }
+ ipn = net_generic(net, ipip_net_id);
+ rtnl_lock();
+ ipip_destroy_tunnels(ipn);
+ unregister_netdevice(ipn->fb_tunnel_dev);
+ rtnl_unlock();
+ kfree(ipn);
+}
+
+static struct pernet_operations ipip_net_ops = {
+ .init = ipip_init_net,
+ .exit = ipip_exit_net,
+};
+
+static int __init ipip_init(void)
+{
+ int err;
+
+ printk(banner);
+
+ if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
+ printk(KERN_INFO "ipip init: can't register tunnel\n");
+ return -EAGAIN;
}
+
+ err = register_pernet_gen_device(&ipip_net_id, &ipip_net_ops);
+ if (err)
+ xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
+
+ return err;
}
static void __exit ipip_fini(void)
if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
printk(KERN_INFO "ipip close: can't deregister tunnel\n");
- rtnl_lock();
- ipip_destroy_tunnels();
- unregister_netdevice(ipip_fb_tunnel_dev);
- rtnl_unlock();
+ unregister_pernet_gen_device(ipip_net_id, &ipip_net_ops);
}
module_init(ipip_init);