X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=net%2Fipv4%2Fipip.c;h=af5cb53da5cccdd68350c9a83a66f3fdb81f71d3;hb=252815b0cfe711001eff0327872209986b36d490;hp=dbaed69de06a8f1a52b4034c8531118bd9824d74;hpb=c523aef0f7284970463ebd77a54bca9069711d4a;p=linux-2.6 diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index dbaed69de0..af5cb53da5 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -115,49 +115,57 @@ #include #include #include +#include +#include #define HASH_SIZE 16 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) +static int ipip_net_id; +struct ipip_net { + struct ip_tunnel *tunnels_r_l[HASH_SIZE]; + struct ip_tunnel *tunnels_r[HASH_SIZE]; + struct ip_tunnel *tunnels_l[HASH_SIZE]; + struct ip_tunnel *tunnels_wc[1]; + struct ip_tunnel **tunnels[4]; + + struct net_device *fb_tunnel_dev; +}; + static int ipip_fb_tunnel_init(struct net_device *dev); static int ipip_tunnel_init(struct net_device *dev); static void ipip_tunnel_setup(struct net_device *dev); -static struct net_device *ipip_fb_tunnel_dev; - -static struct ip_tunnel *tunnels_r_l[HASH_SIZE]; -static struct ip_tunnel *tunnels_r[HASH_SIZE]; -static struct ip_tunnel *tunnels_l[HASH_SIZE]; -static struct ip_tunnel *tunnels_wc[1]; -static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l }; - static DEFINE_RWLOCK(ipip_lock); -static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local) +static struct ip_tunnel * ipip_tunnel_lookup(struct net *net, + __be32 remote, __be32 local) { unsigned h0 = HASH(remote); unsigned h1 = HASH(local); struct ip_tunnel *t; + struct ipip_net *ipn = net_generic(net, ipip_net_id); - for (t = tunnels_r_l[h0^h1]; t; t = t->next) { + for (t = ipn->tunnels_r_l[h0^h1]; t; t = t->next) { if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) return t; } - for (t = tunnels_r[h0]; t; t = t->next) { + for (t = ipn->tunnels_r[h0]; t; t = t->next) { if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) return t; } - for (t = tunnels_l[h1]; t; t = t->next) { + for (t = ipn->tunnels_l[h1]; t; t = t->next) { if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) return t; } - if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP)) + if ((t = ipn->tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP)) return t; return NULL; } -static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms) +static struct ip_tunnel **__ipip_bucket(struct ipip_net *ipn, + struct ip_tunnel_parm *parms) { __be32 remote = parms->iph.daddr; __be32 local = parms->iph.saddr; @@ -172,19 +180,20 @@ static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms) prio |= 1; h ^= HASH(local); } - return &tunnels[prio][h]; + return &ipn->tunnels[prio][h]; } -static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t) +static inline struct ip_tunnel **ipip_bucket(struct ipip_net *ipn, + struct ip_tunnel *t) { - return __ipip_bucket(&t->parms); + return __ipip_bucket(ipn, &t->parms); } -static void ipip_tunnel_unlink(struct ip_tunnel *t) +static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t) { struct ip_tunnel **tp; - for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) { + for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) { if (t == *tp) { write_lock_bh(&ipip_lock); *tp = t->next; @@ -194,9 +203,9 @@ static void ipip_tunnel_unlink(struct ip_tunnel *t) } } -static void ipip_tunnel_link(struct ip_tunnel *t) +static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t) { - struct ip_tunnel **tp = ipip_bucket(t); + struct ip_tunnel **tp = ipip_bucket(ipn, t); t->next = *tp; write_lock_bh(&ipip_lock); @@ -204,15 +213,17 @@ static void ipip_tunnel_link(struct ip_tunnel *t) write_unlock_bh(&ipip_lock); } -static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create) +static struct ip_tunnel * ipip_tunnel_locate(struct net *net, + struct ip_tunnel_parm *parms, int create) { __be32 remote = parms->iph.daddr; __be32 local = parms->iph.saddr; struct ip_tunnel *t, **tp, *nt; struct net_device *dev; char name[IFNAMSIZ]; + struct ipip_net *ipn = net_generic(net, ipip_net_id); - for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) { + for (tp = __ipip_bucket(ipn, parms); (t = *tp) != NULL; tp = &t->next) { if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) return t; } @@ -228,6 +239,8 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c if (dev == NULL) return NULL; + dev_net_set(dev, net); + if (strchr(name, '%')) { if (dev_alloc_name(dev, name) < 0) goto failed_free; @@ -241,7 +254,7 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c goto failed_free; dev_hold(dev); - ipip_tunnel_link(nt); + ipip_tunnel_link(ipn, nt); return nt; failed_free: @@ -251,20 +264,22 @@ failed_free: static void ipip_tunnel_uninit(struct net_device *dev) { - if (dev == ipip_fb_tunnel_dev) { + struct net *net = dev_net(dev); + struct ipip_net *ipn = net_generic(net, ipip_net_id); + + if (dev == ipn->fb_tunnel_dev) { write_lock_bh(&ipip_lock); - tunnels_wc[0] = NULL; + ipn->tunnels_wc[0] = NULL; write_unlock_bh(&ipip_lock); } else - ipip_tunnel_unlink(netdev_priv(dev)); + ipip_tunnel_unlink(ipn, netdev_priv(dev)); dev_put(dev); } static int ipip_err(struct sk_buff *skb, u32 info) { -#ifndef I_WISH_WORLD_WERE_PERFECT -/* It is not :-( All the routers (except for Linux) return only +/* All the routers (except for Linux) return only 8 bytes of packet payload. It means, that precise relaying of ICMP in the real Internet is absolutely infeasible. */ @@ -305,7 +320,7 @@ static int ipip_err(struct sk_buff *skb, u32 info) err = -ENOENT; read_lock(&ipip_lock); - t = ipip_tunnel_lookup(iph->daddr, iph->saddr); + t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); if (t == NULL || t->parms.iph.daddr == 0) goto out; @@ -321,133 +336,6 @@ static int ipip_err(struct sk_buff *skb, u32 info) out: read_unlock(&ipip_lock); return err; -#else - struct iphdr *iph = (struct iphdr*)dp; - int hlen = iph->ihl<<2; - struct iphdr *eiph; - const int type = icmp_hdr(skb)->type; - const int code = icmp_hdr(skb)->code; - int rel_type = 0; - int rel_code = 0; - __be32 rel_info = 0; - __u32 n = 0; - struct sk_buff *skb2; - struct flowi fl; - struct rtable *rt; - - if (len < hlen + sizeof(struct iphdr)) - return 0; - eiph = (struct iphdr*)(dp + hlen); - - switch (type) { - default: - return 0; - case ICMP_PARAMETERPROB: - n = ntohl(icmp_hdr(skb)->un.gateway) >> 24; - if (n < hlen) - return 0; - - /* So... This guy found something strange INSIDE encapsulated - packet. Well, he is fool, but what can we do ? - */ - rel_type = ICMP_PARAMETERPROB; - rel_info = htonl((n - hlen) << 24); - break; - - case ICMP_DEST_UNREACH: - switch (code) { - case ICMP_SR_FAILED: - case ICMP_PORT_UNREACH: - /* Impossible event. */ - return 0; - case ICMP_FRAG_NEEDED: - /* And it is the only really necessary thing :-) */ - n = ntohs(icmp_hdr(skb)->un.frag.mtu); - if (n < hlen+68) - return 0; - n -= hlen; - /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ - if (n > ntohs(eiph->tot_len)) - return 0; - rel_info = htonl(n); - break; - default: - /* All others are translated to HOST_UNREACH. - rfc2003 contains "deep thoughts" about NET_UNREACH, - I believe, it is just ether pollution. --ANK - */ - rel_type = ICMP_DEST_UNREACH; - rel_code = ICMP_HOST_UNREACH; - break; - } - break; - case ICMP_TIME_EXCEEDED: - if (code != ICMP_EXC_TTL) - return 0; - break; - } - - /* Prepare fake skb to feed it to icmp_send */ - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2 == NULL) - return 0; - dst_release(skb2->dst); - skb2->dst = NULL; - skb_pull(skb2, skb->data - (u8*)eiph); - skb_reset_network_header(skb2); - - /* Try to guess incoming interface */ - memset(&fl, 0, sizeof(fl)); - fl.fl4_daddr = eiph->saddr; - fl.fl4_tos = RT_TOS(eiph->tos); - fl.proto = IPPROTO_IPIP; - if (ip_route_output_key(&init_net, &rt, &key)) { - kfree_skb(skb2); - return 0; - } - skb2->dev = rt->u.dst.dev; - - /* route "incoming" packet */ - if (rt->rt_flags&RTCF_LOCAL) { - ip_rt_put(rt); - rt = NULL; - fl.fl4_daddr = eiph->daddr; - fl.fl4_src = eiph->saddr; - fl.fl4_tos = eiph->tos; - if (ip_route_output_key(&init_net, &rt, &fl) || - rt->u.dst.dev->type != ARPHRD_TUNNEL) { - ip_rt_put(rt); - kfree_skb(skb2); - return 0; - } - } else { - ip_rt_put(rt); - if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || - skb2->dst->dev->type != ARPHRD_TUNNEL) { - kfree_skb(skb2); - return 0; - } - } - - /* change mtu on this route */ - if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - if (n > dst_mtu(skb2->dst)) { - kfree_skb(skb2); - return 0; - } - skb2->dst->ops->update_pmtu(skb2->dst, n); - } else if (type == ICMP_TIME_EXCEEDED) { - struct ip_tunnel *t = netdev_priv(skb2->dev); - if (t->parms.iph.ttl) { - rel_type = ICMP_DEST_UNREACH; - rel_code = ICMP_HOST_UNREACH; - } - } - - icmp_send(skb2, rel_type, rel_code, rel_info); - kfree_skb(skb2); - return 0; -#endif } static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph, @@ -465,7 +353,8 @@ static int ipip_rcv(struct sk_buff *skb) const struct iphdr *iph = ip_hdr(skb); read_lock(&ipip_lock); - if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) { + if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev), + iph->saddr, iph->daddr)) != NULL) { if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { read_unlock(&ipip_lock); kfree_skb(skb); @@ -528,7 +417,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (!dst) { /* NBMA tunnel */ - if ((rt = (struct rtable*)skb->dst) == NULL) { + if ((rt = skb->rtable) == NULL) { tunnel->stat.tx_fifo_errors++; goto tx_error; } @@ -543,7 +432,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) .saddr = tiph->saddr, .tos = RT_TOS(tos) } }, .proto = IPPROTO_IPIP }; - if (ip_route_output_key(&init_net, &rt, &fl)) { + if (ip_route_output_key(dev_net(dev), &rt, &fl)) { tunnel->stat.tx_carrier_errors++; goto tx_error_icmp; } @@ -664,7 +553,7 @@ static void ipip_tunnel_bind_dev(struct net_device *dev) .tos = RT_TOS(iph->tos) } }, .proto = IPPROTO_IPIP }; struct rtable *rt; - if (!ip_route_output_key(&init_net, &rt, &fl)) { + if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { tdev = rt->u.dst.dev; ip_rt_put(rt); } @@ -672,7 +561,7 @@ static void ipip_tunnel_bind_dev(struct net_device *dev) } if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(&init_net, tunnel->parms.link); + tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); if (tdev) { dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); @@ -687,16 +576,18 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) int err = 0; struct ip_tunnel_parm p; struct ip_tunnel *t; + struct net *net = dev_net(dev); + struct ipip_net *ipn = net_generic(net, ipip_net_id); switch (cmd) { case SIOCGETTUNNEL: t = NULL; - if (dev == ipip_fb_tunnel_dev) { + if (dev == ipn->fb_tunnel_dev) { if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { err = -EFAULT; break; } - t = ipip_tunnel_locate(&p, 0); + t = ipip_tunnel_locate(net, &p, 0); } if (t == NULL) t = netdev_priv(dev); @@ -722,9 +613,9 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) if (p.iph.ttl) p.iph.frag_off |= htons(IP_DF); - t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL); + t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); - if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { + if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { if (t != NULL) { if (t->dev != dev) { err = -EEXIST; @@ -737,12 +628,12 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) break; } t = netdev_priv(dev); - ipip_tunnel_unlink(t); + ipip_tunnel_unlink(ipn, t); t->parms.iph.saddr = p.iph.saddr; t->parms.iph.daddr = p.iph.daddr; memcpy(dev->dev_addr, &p.iph.saddr, 4); memcpy(dev->broadcast, &p.iph.daddr, 4); - ipip_tunnel_link(t); + ipip_tunnel_link(ipn, t); netdev_state_change(dev); } } @@ -770,15 +661,15 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) if (!capable(CAP_NET_ADMIN)) goto done; - if (dev == ipip_fb_tunnel_dev) { + if (dev == ipn->fb_tunnel_dev) { err = -EFAULT; if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) goto done; err = -ENOENT; - if ((t = ipip_tunnel_locate(&p, 0)) == NULL) + if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL) goto done; err = -EPERM; - if (t->dev == ipip_fb_tunnel_dev) + if (t->dev == ipn->fb_tunnel_dev) goto done; dev = t->dev; } @@ -822,6 +713,7 @@ static void ipip_tunnel_setup(struct net_device *dev) dev->flags = IFF_NOARP; dev->iflink = 0; dev->addr_len = 4; + dev->features |= NETIF_F_NETNS_LOCAL; } static int ipip_tunnel_init(struct net_device *dev) @@ -841,10 +733,11 @@ static int ipip_tunnel_init(struct net_device *dev) return 0; } -static int __init ipip_fb_tunnel_init(struct net_device *dev) +static int ipip_fb_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; + struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id); tunnel->dev = dev; strcpy(tunnel->parms.name, dev->name); @@ -854,7 +747,7 @@ static int __init ipip_fb_tunnel_init(struct net_device *dev) iph->ihl = 5; dev_hold(dev); - tunnels_wc[0] = tunnel; + ipn->tunnels_wc[0] = tunnel; return 0; } @@ -867,50 +760,98 @@ static struct xfrm_tunnel ipip_handler = { static char banner[] __initdata = KERN_INFO "IPv4 over IPv4 tunneling driver\n"; -static int __init ipip_init(void) +static void ipip_destroy_tunnels(struct ipip_net *ipn) +{ + int prio; + + for (prio = 1; prio < 4; prio++) { + int h; + for (h = 0; h < HASH_SIZE; h++) { + struct ip_tunnel *t; + while ((t = ipn->tunnels[prio][h]) != NULL) + unregister_netdevice(t->dev); + } + } +} + +static int ipip_init_net(struct net *net) { int err; + struct ipip_net *ipn; - printk(banner); + err = -ENOMEM; + ipn = kzalloc(sizeof(struct ipip_net), GFP_KERNEL); + if (ipn == NULL) + goto err_alloc; - if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) { - printk(KERN_INFO "ipip init: can't register tunnel\n"); - return -EAGAIN; - } + err = net_assign_generic(net, ipip_net_id, ipn); + if (err < 0) + goto err_assign; + + ipn->tunnels[0] = ipn->tunnels_wc; + ipn->tunnels[1] = ipn->tunnels_l; + ipn->tunnels[2] = ipn->tunnels_r; + ipn->tunnels[3] = ipn->tunnels_r_l; - ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), + ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "tunl0", ipip_tunnel_setup); - if (!ipip_fb_tunnel_dev) { + if (!ipn->fb_tunnel_dev) { err = -ENOMEM; - goto err1; + goto err_alloc_dev; } - ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init; + ipn->fb_tunnel_dev->init = ipip_fb_tunnel_init; + dev_net_set(ipn->fb_tunnel_dev, net); + + if ((err = register_netdev(ipn->fb_tunnel_dev))) + goto err_reg_dev; + + return 0; - if ((err = register_netdev(ipip_fb_tunnel_dev))) - goto err2; - out: +err_reg_dev: + free_netdev(ipn->fb_tunnel_dev); +err_alloc_dev: + /* nothing */ +err_assign: + kfree(ipn); +err_alloc: return err; - err2: - free_netdev(ipip_fb_tunnel_dev); - err1: - xfrm4_tunnel_deregister(&ipip_handler, AF_INET); - goto out; } -static void __exit ipip_destroy_tunnels(void) +static void ipip_exit_net(struct net *net) { - int prio; + struct ipip_net *ipn; - for (prio = 1; prio < 4; prio++) { - int h; - for (h = 0; h < HASH_SIZE; h++) { - struct ip_tunnel *t; - while ((t = tunnels[prio][h]) != NULL) - unregister_netdevice(t->dev); - } + ipn = net_generic(net, ipip_net_id); + rtnl_lock(); + ipip_destroy_tunnels(ipn); + unregister_netdevice(ipn->fb_tunnel_dev); + rtnl_unlock(); + kfree(ipn); +} + +static struct pernet_operations ipip_net_ops = { + .init = ipip_init_net, + .exit = ipip_exit_net, +}; + +static int __init ipip_init(void) +{ + int err; + + printk(banner); + + if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) { + printk(KERN_INFO "ipip init: can't register tunnel\n"); + return -EAGAIN; } + + err = register_pernet_gen_device(&ipip_net_id, &ipip_net_ops); + if (err) + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); + + return err; } static void __exit ipip_fini(void) @@ -918,10 +859,7 @@ static void __exit ipip_fini(void) if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET)) printk(KERN_INFO "ipip close: can't deregister tunnel\n"); - rtnl_lock(); - ipip_destroy_tunnels(); - unregister_netdevice(ipip_fb_tunnel_dev); - rtnl_unlock(); + unregister_pernet_gen_device(ipip_net_id, &ipip_net_ops); } module_init(ipip_init);