X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=net%2Fipv4%2Ficmp.c;h=ff9a8e643fccbe1ee08e76f9190b6409bd07cc59;hb=7524d7d6de5d5d3f081de8cf5479819fad339661;hp=272c69e106e9a6a416249fcb77607ac43c16940a;hpb=541010e4b8921cd781ff02ae68028501457045b6;p=linux-2.6 diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 272c69e106..ff9a8e643f 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -92,6 +92,7 @@ #include #include #include +#include /* * Build xmit assembly blocks @@ -228,14 +229,16 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1]; * * On SMP we have one ICMP socket per-cpu. */ -static DEFINE_PER_CPU(struct socket *, __icmp_socket) = NULL; -#define icmp_socket __get_cpu_var(__icmp_socket) +static struct sock *icmp_sk(struct net *net) +{ + return net->ipv4.icmp_sk[smp_processor_id()]; +} -static __inline__ int icmp_xmit_lock(void) +static inline int icmp_xmit_lock(struct sock *sk) { local_bh_disable(); - if (unlikely(!spin_trylock(&icmp_socket->sk->sk_lock.slock))) { + if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { /* This can happen if the output path signals a * dst_link_failure() for an outgoing ICMP packet. */ @@ -245,9 +248,9 @@ static __inline__ int icmp_xmit_lock(void) return 0; } -static void icmp_xmit_unlock(void) +static inline void icmp_xmit_unlock(struct sock *sk) { - spin_unlock_bh(&icmp_socket->sk->sk_lock.slock); + spin_unlock_bh(&sk->sk_lock.slock); } /* @@ -274,18 +277,19 @@ static void icmp_xmit_unlock(void) #define XRLIM_BURST_FACTOR 6 int xrlim_allow(struct dst_entry *dst, int timeout) { - unsigned long now; + unsigned long now, token = dst->rate_tokens; int rc = 0; now = jiffies; - dst->rate_tokens += now - dst->rate_last; + token += now - dst->rate_last; dst->rate_last = now; - if (dst->rate_tokens > XRLIM_BURST_FACTOR * timeout) - dst->rate_tokens = XRLIM_BURST_FACTOR * timeout; - if (dst->rate_tokens >= timeout) { - dst->rate_tokens -= timeout; + if (token > XRLIM_BURST_FACTOR * timeout) + token = XRLIM_BURST_FACTOR * timeout; + if (token >= timeout) { + token -= timeout; rc = 1; } + dst->rate_tokens = token; return rc; } @@ -344,19 +348,21 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd, static void icmp_push_reply(struct icmp_bxm *icmp_param, struct ipcm_cookie *ipc, struct rtable *rt) { + struct sock *sk; struct sk_buff *skb; - if (ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param, + sk = icmp_sk(rt->u.dst.dev->nd_net); + if (ip_append_data(sk, icmp_glue_bits, icmp_param, icmp_param->data_len+icmp_param->head_len, icmp_param->head_len, ipc, rt, MSG_DONTWAIT) < 0) - ip_flush_pending_frames(icmp_socket->sk); - else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) { + ip_flush_pending_frames(sk); + else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { struct icmphdr *icmph = icmp_hdr(skb); __wsum csum = 0; struct sk_buff *skb1; - skb_queue_walk(&icmp_socket->sk->sk_write_queue, skb1) { + skb_queue_walk(&sk->sk_write_queue, skb1) { csum = csum_add(csum, skb1->csum); } csum = csum_partial_copy_nocheck((void *)&icmp_param->data, @@ -364,7 +370,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, icmp_param->head_len, csum); icmph->checksum = csum_fold(csum); skb->ip_summed = CHECKSUM_NONE; - ip_push_pending_frames(icmp_socket->sk); + ip_push_pending_frames(sk); } } @@ -374,16 +380,17 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) { - struct sock *sk = icmp_socket->sk; - struct inet_sock *inet = inet_sk(sk); struct ipcm_cookie ipc; - struct rtable *rt = (struct rtable *)skb->dst; + struct rtable *rt = skb->rtable; + struct net *net = rt->u.dst.dev->nd_net; + struct sock *sk = icmp_sk(net); + struct inet_sock *inet = inet_sk(sk); __be32 daddr; if (ip_options_echo(&icmp_param->replyopts, skb)) return; - if (icmp_xmit_lock()) + if (icmp_xmit_lock(sk)) return; icmp_param->data.icmph.checksum = 0; @@ -403,7 +410,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) .tos = RT_TOS(ip_hdr(skb)->tos) } }, .proto = IPPROTO_ICMP }; security_skb_classify_flow(skb, &fl); - if (ip_route_output_key(&rt, &fl)) + if (ip_route_output_key(net, &rt, &fl)) goto out_unlock; } if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type, @@ -411,7 +418,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) icmp_push_reply(icmp_param, &ipc, rt); ip_rt_put(rt); out_unlock: - icmp_xmit_unlock(); + icmp_xmit_unlock(sk); } @@ -431,13 +438,17 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) struct iphdr *iph; int room; struct icmp_bxm icmp_param; - struct rtable *rt = (struct rtable *)skb_in->dst; + struct rtable *rt = skb_in->rtable; struct ipcm_cookie ipc; __be32 saddr; u8 tos; + struct net *net; + struct sock *sk; if (!rt) goto out; + net = rt->u.dst.dev->nd_net; + sk = icmp_sk(net); /* * Find the original header. It is expected to be valid, of course. @@ -501,7 +512,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) } } - if (icmp_xmit_lock()) + if (icmp_xmit_lock(sk)) return; /* @@ -513,7 +524,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) struct net_device *dev = NULL; if (rt->fl.iif && sysctl_icmp_errors_use_inbound_ifaddr) - dev = dev_get_by_index(&init_net, rt->fl.iif); + dev = dev_get_by_index(net, rt->fl.iif); if (dev) { saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK); @@ -540,8 +551,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) icmp_param.data.icmph.checksum = 0; icmp_param.skb = skb_in; icmp_param.offset = skb_network_offset(skb_in); - icmp_out_count(icmp_param.data.icmph.type); - inet_sk(icmp_socket->sk)->tos = tos; + inet_sk(sk)->tos = tos; ipc.addr = iph->saddr; ipc.opt = &icmp_param.replyopts; @@ -564,11 +574,71 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) } } }; + int err; + struct rtable *rt2; + security_skb_classify_flow(skb_in, &fl); - if (ip_route_output_key(&rt, &fl)) + if (__ip_route_output_key(net, &rt, &fl)) + goto out_unlock; + + /* No need to clone since we're just using its address. */ + rt2 = rt; + + err = xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0); + switch (err) { + case 0: + if (rt != rt2) + goto route_done; + break; + case -EPERM: + rt = NULL; + break; + default: + goto out_unlock; + } + + if (xfrm_decode_session_reverse(skb_in, &fl, AF_INET)) + goto out_unlock; + + if (inet_addr_type(net, fl.fl4_src) == RTN_LOCAL) + err = __ip_route_output_key(net, &rt2, &fl); + else { + struct flowi fl2 = {}; + struct dst_entry *odst; + + fl2.fl4_dst = fl.fl4_src; + if (ip_route_output_key(net, &rt2, &fl2)) + goto out_unlock; + + /* Ugh! */ + odst = skb_in->dst; + err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src, + RT_TOS(tos), rt2->u.dst.dev); + + dst_release(&rt2->u.dst); + rt2 = skb_in->rtable; + skb_in->dst = odst; + } + + if (err) + goto out_unlock; + + err = xfrm_lookup((struct dst_entry **)&rt2, &fl, NULL, + XFRM_LOOKUP_ICMP); + if (err == -ENOENT) { + if (!rt) + goto out_unlock; + goto route_done; + } + + dst_release(&rt->u.dst); + rt = rt2; + + if (err) goto out_unlock; } +route_done: if (!icmpv4_xrlim_allow(rt, type, code)) goto ende; @@ -589,7 +659,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) ende: ip_rt_put(rt); out_unlock: - icmp_xmit_unlock(); + icmp_xmit_unlock(sk); out:; } @@ -604,8 +674,10 @@ static void icmp_unreach(struct sk_buff *skb) struct icmphdr *icmph; int hash, protocol; struct net_protocol *ipprot; - struct sock *raw_sk; u32 info = 0; + struct net *net; + + net = skb->dst->dev->nd_net; /* * Incomplete header ? @@ -636,7 +708,7 @@ static void icmp_unreach(struct sk_buff *skb) "and DF set.\n", NIPQUAD(iph->daddr)); } else { - info = ip_rt_frag_needed(iph, + info = ip_rt_frag_needed(net, iph, ntohs(icmph->un.frag.mtu)); if (!info) goto out; @@ -674,7 +746,7 @@ static void icmp_unreach(struct sk_buff *skb) */ if (!sysctl_icmp_ignore_bogus_error_responses && - inet_addr_type(iph->daddr) == RTN_BROADCAST) { + inet_addr_type(net, iph->daddr) == RTN_BROADCAST) { if (net_ratelimit()) printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP " "type %u, code %u " @@ -698,21 +770,9 @@ static void icmp_unreach(struct sk_buff *skb) /* * Deliver ICMP message to raw sockets. Pretty useless feature? */ + raw_icmp_error(skb, protocol, info); - /* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */ hash = protocol & (MAX_INET_PROTOS - 1); - read_lock(&raw_v4_lock); - if ((raw_sk = sk_head(&raw_v4_htable[hash])) != NULL) { - while ((raw_sk = __raw_v4_lookup(raw_sk, protocol, iph->daddr, - iph->saddr, - skb->dev->ifindex)) != NULL) { - raw_err(raw_sk, skb, info); - raw_sk = sk_next(raw_sk); - iph = (struct iphdr *)skb->data; - } - } - read_unlock(&raw_v4_lock); - rcu_read_lock(); ipprot = rcu_dereference(inet_protos[hash]); if (ipprot && ipprot->err_handler) @@ -883,7 +943,7 @@ static void icmp_address(struct sk_buff *skb) static void icmp_address_reply(struct sk_buff *skb) { - struct rtable *rt = (struct rtable *)skb->dst; + struct rtable *rt = skb->rtable; struct net_device *dev = skb->dev; struct in_device *in_dev; struct in_ifaddr *ifa; @@ -928,7 +988,26 @@ static void icmp_discard(struct sk_buff *skb) int icmp_rcv(struct sk_buff *skb) { struct icmphdr *icmph; - struct rtable *rt = (struct rtable *)skb->dst; + struct rtable *rt = skb->rtable; + + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { + int nh; + + if (!(skb->sp && skb->sp->xvec[skb->sp->len - 1]->props.flags & + XFRM_STATE_ICMP)) + goto drop; + + if (!pskb_may_pull(skb, sizeof(*icmph) + sizeof(struct iphdr))) + goto drop; + + nh = skb_network_offset(skb); + skb_set_network_header(skb, sizeof(*icmph)); + + if (!xfrm4_policy_check_reverse(NULL, XFRM_POLICY_IN, skb)) + goto drop; + + skb_set_network_header(skb, nh); + } ICMP_INC_STATS_BH(ICMP_MIB_INMSGS); @@ -943,7 +1022,7 @@ int icmp_rcv(struct sk_buff *skb) goto error; } - if (!pskb_pull(skb, sizeof(struct icmphdr))) + if (!pskb_pull(skb, sizeof(*icmph))) goto error; icmph = icmp_hdr(skb); @@ -1067,29 +1146,46 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = { }, }; -void __init icmp_init(struct net_proto_family *ops) +static void __net_exit icmp_sk_exit(struct net *net) { - struct inet_sock *inet; int i; - for_each_possible_cpu(i) { - int err; + for_each_possible_cpu(i) + sk_release_kernel(net->ipv4.icmp_sk[i]); + kfree(net->ipv4.icmp_sk); + net->ipv4.icmp_sk = NULL; +} + +int __net_init icmp_sk_init(struct net *net) +{ + int i, err; - err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, - &per_cpu(__icmp_socket, i)); + net->ipv4.icmp_sk = + kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL); + if (net->ipv4.icmp_sk == NULL) + return -ENOMEM; + for_each_possible_cpu(i) { + struct sock *sk; + struct socket *sock; + struct inet_sock *inet; + + err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, &sock); if (err < 0) - panic("Failed to create the ICMP control socket.\n"); + goto fail; + + net->ipv4.icmp_sk[i] = sk = sock->sk; + sk_change_net(sk, net); - per_cpu(__icmp_socket, i)->sk->sk_allocation = GFP_ATOMIC; + sk->sk_allocation = GFP_ATOMIC; /* Enough space for 2 64K ICMP packets, including * sk_buff struct overhead. */ - per_cpu(__icmp_socket, i)->sk->sk_sndbuf = + sk->sk_sndbuf = (2 * ((64 * 1024) + sizeof(struct sk_buff))); - inet = inet_sk(per_cpu(__icmp_socket, i)->sk); + inet = inet_sk(sk); inet->uc_ttl = -1; inet->pmtudisc = IP_PMTUDISC_DONT; @@ -1097,12 +1193,28 @@ void __init icmp_init(struct net_proto_family *ops) * see it, we do not wish this socket to see incoming * packets. */ - per_cpu(__icmp_socket, i)->sk->sk_prot->unhash(per_cpu(__icmp_socket, i)->sk); + sk->sk_prot->unhash(sk); } + return 0; + +fail: + for_each_possible_cpu(i) + sk_release_kernel(net->ipv4.icmp_sk[i]); + kfree(net->ipv4.icmp_sk); + return err; +} + +static struct pernet_operations __net_initdata icmp_sk_ops = { + .init = icmp_sk_init, + .exit = icmp_sk_exit, +}; + +int __init icmp_init(void) +{ + return register_pernet_device(&icmp_sk_ops); } EXPORT_SYMBOL(icmp_err_convert); EXPORT_SYMBOL(icmp_send); EXPORT_SYMBOL(icmp_statistics); -EXPORT_SYMBOL(icmpmsg_statistics); EXPORT_SYMBOL(xrlim_allow);