X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=net%2Fipv4%2Fudp.c;h=7ea1b67b6de1cccbbc5d4332ef45df04578b8aae;hb=5e226e4d9016daee170699f8a4188a5505021756;hp=3d6021585658161bfff9a9765f84f5eaa64d40ae;hpb=9055e051b8d4b266054fe511a65a9888d30fa64f;p=linux-2.6 diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 3d60215856..7ea1b67b6d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -82,6 +82,7 @@ #include #include #include +#include #include #include #include @@ -118,14 +119,25 @@ EXPORT_SYMBOL(udp_stats_in6); struct hlist_head udp_hash[UDP_HTABLE_SIZE]; DEFINE_RWLOCK(udp_hash_lock); -static inline int __udp_lib_lport_inuse(__u16 num, +int sysctl_udp_mem[3] __read_mostly; +int sysctl_udp_rmem_min __read_mostly; +int sysctl_udp_wmem_min __read_mostly; + +EXPORT_SYMBOL(sysctl_udp_mem); +EXPORT_SYMBOL(sysctl_udp_rmem_min); +EXPORT_SYMBOL(sysctl_udp_wmem_min); + +atomic_t udp_memory_allocated; +EXPORT_SYMBOL(udp_memory_allocated); + +static inline int __udp_lib_lport_inuse(struct net *net, __u16 num, const struct hlist_head udptable[]) { struct sock *sk; struct hlist_node *node; sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) - if (sk->sk_hash == num) + if (sk->sk_net == net && sk->sk_hash == num) return 1; return 0; } @@ -147,6 +159,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, struct hlist_head *head; struct sock *sk2; int error = 1; + struct net *net = sk->sk_net; write_lock_bh(&udp_hash_lock); @@ -186,7 +199,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, /* 2nd pass: find hole in shortest hash chain */ rover = best; for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) { - if (! __udp_lib_lport_inuse(rover, udptable)) + if (! __udp_lib_lport_inuse(net, rover, udptable)) goto gotit; rover += UDP_HTABLE_SIZE; if (rover > high) @@ -206,6 +219,7 @@ gotit: sk_for_each(sk2, node, head) if (sk2->sk_hash == snum && sk2 != sk && + sk2->sk_net == net && (!sk2->sk_reuse || !sk->sk_reuse) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && @@ -218,7 +232,7 @@ gotit: if (sk_unhashed(sk)) { head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; sk_add_node(sk, head); - sock_prot_inc_use(sk->sk_prot); + sock_prot_inuse_add(sk->sk_prot, 1); } error = 0; fail: @@ -249,9 +263,9 @@ static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ -static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport, - __be32 daddr, __be16 dport, - int dif, struct hlist_head udptable[]) +static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, + __be16 sport, __be32 daddr, __be16 dport, + int dif, struct hlist_head udptable[]) { struct sock *sk, *result = NULL; struct hlist_node *node; @@ -262,7 +276,8 @@ static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport, sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { struct inet_sock *inet = inet_sk(sk); - if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) { + if (sk->sk_net == net && sk->sk_hash == hnum && + !ipv6_only_sock(sk)) { int score = (sk->sk_family == PF_INET ? 1 : 0); if (inet->rcv_saddr) { if (inet->rcv_saddr != daddr) @@ -349,8 +364,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) int harderr; int err; - sk = __udp4_lib_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, - skb->dev->ifindex, udptable ); + sk = __udp4_lib_lookup(skb->dev->nd_net, iph->daddr, uh->dest, + iph->saddr, uh->source, skb->dev->ifindex, udptable); if (sk == NULL) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); return; /* No socket for error */ @@ -406,7 +421,7 @@ out: void udp_err(struct sk_buff *skb, u32 info) { - return __udp4_lib_err(skb, info, udp_hash); + __udp4_lib_err(skb, info, udp_hash); } /* @@ -626,7 +641,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, connected = 0; } - if (MULTICAST(daddr)) { + if (ipv4_is_multicast(daddr)) { if (!ipc.oif) ipc.oif = inet->mc_index; if (!saddr) @@ -648,7 +663,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, { .sport = inet->sport, .dport = dport } } }; security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(&rt, &fl, sk, 1); + err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1); if (err) { if (err == -ENETUNREACH) IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); @@ -901,13 +916,17 @@ try_again: err = ulen; out_free: + lock_sock(sk); skb_free_datagram(sk, skb); + release_sock(sk); out: return err; csum_copy_err: + lock_sock(sk); if (!skb_kill_datagram(sk, skb, flags)) UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); + release_sock(sk); if (noblock) return -EAGAIN; @@ -1072,7 +1091,15 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb, skb1 = skb_clone(skb, GFP_ATOMIC); if (skb1) { - int ret = udp_queue_rcv_skb(sk, skb1); + int ret = 0; + + bh_lock_sock_nested(sk); + if (!sock_owned_by_user(sk)) + ret = udp_queue_rcv_skb(sk, skb1); + else + sk_add_backlog(sk, skb1); + bh_unlock_sock(sk); + if (ret > 0) /* we should probably re-process instead * of dropping packets here. */ @@ -1161,11 +1188,17 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); - sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest, - inet_iif(skb), udptable); + sk = __udp4_lib_lookup(skb->dev->nd_net, saddr, uh->source, daddr, + uh->dest, inet_iif(skb), udptable); if (sk != NULL) { - int ret = udp_queue_rcv_skb(sk, skb); + int ret = 0; + bh_lock_sock_nested(sk); + if (!sock_owned_by_user(sk)) + ret = udp_queue_rcv_skb(sk, skb); + else + sk_add_backlog(sk, skb); + bh_unlock_sock(sk); sock_put(sk); /* a return value > 0 means to resubmit the input, but @@ -1460,6 +1493,10 @@ struct proto udp_prot = { .hash = udp_lib_hash, .unhash = udp_lib_unhash, .get_port = udp_v4_get_port, + .memory_allocated = &udp_memory_allocated, + .sysctl_mem = sysctl_udp_mem, + .sysctl_wmem = &sysctl_udp_wmem_min, + .sysctl_rmem = &sysctl_udp_rmem_min, .obj_size = sizeof(struct udp_sock), #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udp_setsockopt, @@ -1516,6 +1553,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) } static void *udp_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(udp_hash_lock) { read_lock(&udp_hash_lock); return *pos ? udp_get_idx(seq, *pos-1) : (void *)1; @@ -1535,6 +1573,7 @@ static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void udp_seq_stop(struct seq_file *seq, void *v) + __releases(udp_hash_lock) { read_unlock(&udp_hash_lock); } @@ -1655,6 +1694,25 @@ void udp4_proc_exit(void) } #endif /* CONFIG_PROC_FS */ +void __init udp_init(void) +{ + unsigned long limit; + + /* Set the pressure threshold up by the same strategy of TCP. It is a + * fraction of global memory that is up to 1/2 at 256 MB, decreasing + * toward zero with the amount of memory, with a floor of 128 pages. + */ + limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); + limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); + limit = max(limit, 128UL); + sysctl_udp_mem[0] = limit / 4 * 3; + sysctl_udp_mem[1] = limit; + sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2; + + sysctl_udp_rmem_min = SK_MEM_QUANTUM; + sysctl_udp_wmem_min = SK_MEM_QUANTUM; +} + EXPORT_SYMBOL(udp_disconnect); EXPORT_SYMBOL(udp_hash); EXPORT_SYMBOL(udp_hash_lock);