X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=net%2Fcore%2Fsock.c;h=91f8bbc93526523a2eaca207359951117712c577;hb=ad619800e4e034cad44299b2a22df9eebb043ac3;hp=7a0567b4b2c950702251ed7ccb3e473bead50723;hpb=533bb8a4d7388686243c37a414c4448ba3566f8a;p=linux-2.6 diff --git a/net/core/sock.c b/net/core/sock.c index 7a0567b4b2..91f8bbc935 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -7,8 +7,6 @@ * handler for protocols to use and generic option handler. * * - * Version: $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, * Florian La Roche, @@ -182,7 +180,7 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" , "clock-21" , "clock-AF_SNA" , "clock-AF_IRDA" , "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" , - "clock-27" , "clock-28" , "clock-29" , + "clock-27" , "clock-28" , "clock-AF_CAN" , "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , "clock-AF_RXRPC" , "clock-AF_MAX" }; @@ -228,11 +226,12 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) static int warned __read_mostly; *timeo_p = 0; - if (warned < 10 && net_ratelimit()) + if (warned < 10 && net_ratelimit()) { warned++; printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) " "tries to set negative timeout\n", current->comm, task_pid_nr(current)); + } return 0; } *timeo_p = MAX_SCHEDULE_TIMEOUT; @@ -269,7 +268,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) int err = 0; int skb_len; - /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces + /* Cast sk->rcvbuf to unsigned... It's pointless, but reduces number of warnings when compiling with -W --ANK */ if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= @@ -372,7 +371,7 @@ static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen) { int ret = -ENOPROTOOPT; #ifdef CONFIG_NETDEVICES - struct net *net = sk->sk_net; + struct net *net = sock_net(sk); char devname[IFNAMSIZ]; int index; @@ -450,15 +449,6 @@ int sock_setsockopt(struct socket *sock, int level, int optname, * Options without arguments */ -#ifdef SO_DONTLINGER /* Compatibility item... */ - if (optname == SO_DONTLINGER) { - lock_sock(sk); - sock_reset_flag(sk, SOCK_LINGER); - release_sock(sk); - return 0; - } -#endif - if (optname == SO_BINDTODEVICE) return sock_bindtodevice(sk, optval, optlen); @@ -942,7 +932,6 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) * @family: protocol family * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) * @prot: struct proto associated with this new sock instance - * @zero_it: if we should zero the newly allocated sock */ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot) @@ -958,7 +947,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, */ sk->sk_prot = sk->sk_prot_creator = prot; sock_lock_init(sk); - sk->sk_net = get_net(net); + sock_net_set(sk, get_net(net)); } return sk; @@ -981,12 +970,32 @@ void sk_free(struct sock *sk) if (atomic_read(&sk->sk_omem_alloc)) printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n", - __FUNCTION__, atomic_read(&sk->sk_omem_alloc)); + __func__, atomic_read(&sk->sk_omem_alloc)); - put_net(sk->sk_net); + put_net(sock_net(sk)); sk_prot_free(sk->sk_prot_creator, sk); } +/* + * Last sock_put should drop referrence to sk->sk_net. It has already + * been dropped in sk_change_net. Taking referrence to stopping namespace + * is not an option. + * Take referrence to a socket to remove it from hash _alive_ and after that + * destroy it in the context of init_net. + */ +void sk_release_kernel(struct sock *sk) +{ + if (sk == NULL || sk->sk_socket == NULL) + return; + + sock_hold(sk); + sock_release(sk->sk_socket); + release_net(sock_net(sk)); + sock_net_set(sk, get_net(&init_net)); + sock_put(sk); +} +EXPORT_SYMBOL(sk_release_kernel); + struct sock *sk_clone(const struct sock *sk, const gfp_t priority) { struct sock *newsk; @@ -998,7 +1007,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) sock_copy(newsk, sk); /* SANITY */ - get_net(newsk->sk_net); + get_net(sock_net(newsk)); sk_node_init(&newsk->sk_node); sock_lock_init(newsk); bh_lock_sock(newsk); @@ -1057,7 +1066,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) * to be taken into account in all callers. -acme */ sk_refcnt_debug_inc(newsk); - newsk->sk_socket = NULL; + sk_set_socket(newsk, NULL); newsk->sk_sleep = NULL; if (newsk->sk_prot->sockets_allocated) @@ -1076,10 +1085,12 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) if (sk->sk_route_caps & NETIF_F_GSO) sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; if (sk_can_gso(sk)) { - if (dst->header_len) + if (dst->header_len) { sk->sk_route_caps &= ~NETIF_F_GSO_MASK; - else + } else { sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; + sk->sk_gso_max_size = dst->dev->gso_max_size; + } } } EXPORT_SYMBOL_GPL(sk_setup_caps); @@ -1431,7 +1442,7 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) /* Under pressure. */ if (allocated > prot->sysctl_mem[1]) if (prot->enter_memory_pressure) - prot->enter_memory_pressure(); + prot->enter_memory_pressure(sk); /* Over hard limit. */ if (allocated > prot->sysctl_mem[2]) @@ -1691,7 +1702,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_rcvbuf = sysctl_rmem_default; sk->sk_sndbuf = sysctl_wmem_default; sk->sk_state = TCP_CLOSE; - sk->sk_socket = sock; + sk_set_socket(sk, sock); sock_set_flag(sk, SOCK_ZAPPED); @@ -1919,16 +1930,113 @@ EXPORT_SYMBOL(sk_common_release); static DEFINE_RWLOCK(proto_list_lock); static LIST_HEAD(proto_list); +#ifdef CONFIG_PROC_FS +#define PROTO_INUSE_NR 64 /* should be enough for the first time */ +struct prot_inuse { + int val[PROTO_INUSE_NR]; +}; + +static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR); + +#ifdef CONFIG_NET_NS +void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) +{ + int cpu = smp_processor_id(); + per_cpu_ptr(net->core.inuse, cpu)->val[prot->inuse_idx] += val; +} +EXPORT_SYMBOL_GPL(sock_prot_inuse_add); + +int sock_prot_inuse_get(struct net *net, struct proto *prot) +{ + int cpu, idx = prot->inuse_idx; + int res = 0; + + for_each_possible_cpu(cpu) + res += per_cpu_ptr(net->core.inuse, cpu)->val[idx]; + + return res >= 0 ? res : 0; +} +EXPORT_SYMBOL_GPL(sock_prot_inuse_get); + +static int sock_inuse_init_net(struct net *net) +{ + net->core.inuse = alloc_percpu(struct prot_inuse); + return net->core.inuse ? 0 : -ENOMEM; +} + +static void sock_inuse_exit_net(struct net *net) +{ + free_percpu(net->core.inuse); +} + +static struct pernet_operations net_inuse_ops = { + .init = sock_inuse_init_net, + .exit = sock_inuse_exit_net, +}; + +static __init int net_inuse_init(void) +{ + if (register_pernet_subsys(&net_inuse_ops)) + panic("Cannot initialize net inuse counters"); + + return 0; +} + +core_initcall(net_inuse_init); +#else +static DEFINE_PER_CPU(struct prot_inuse, prot_inuse); + +void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) +{ + __get_cpu_var(prot_inuse).val[prot->inuse_idx] += val; +} +EXPORT_SYMBOL_GPL(sock_prot_inuse_add); + +int sock_prot_inuse_get(struct net *net, struct proto *prot) +{ + int cpu, idx = prot->inuse_idx; + int res = 0; + + for_each_possible_cpu(cpu) + res += per_cpu(prot_inuse, cpu).val[idx]; + + return res >= 0 ? res : 0; +} +EXPORT_SYMBOL_GPL(sock_prot_inuse_get); +#endif + +static void assign_proto_idx(struct proto *prot) +{ + prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR); + + if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) { + printk(KERN_ERR "PROTO_INUSE_NR exhausted\n"); + return; + } + + set_bit(prot->inuse_idx, proto_inuse_idx); +} + +static void release_proto_idx(struct proto *prot) +{ + if (prot->inuse_idx != PROTO_INUSE_NR - 1) + clear_bit(prot->inuse_idx, proto_inuse_idx); +} +#else +static inline void assign_proto_idx(struct proto *prot) +{ +} + +static inline void release_proto_idx(struct proto *prot) +{ +} +#endif + int proto_register(struct proto *prot, int alloc_slab) { char *request_sock_slab_name = NULL; char *timewait_sock_slab_name; - if (sock_prot_inuse_init(prot) != 0) { - printk(KERN_CRIT "%s: Can't alloc inuse counters!\n", prot->name); - goto out; - } - if (alloc_slab) { prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, SLAB_HWCACHE_ALIGN, NULL); @@ -1936,7 +2044,7 @@ int proto_register(struct proto *prot, int alloc_slab) if (prot->slab == NULL) { printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n", prot->name); - goto out_free_inuse; + goto out; } if (prot->rsk_prot != NULL) { @@ -1979,6 +2087,7 @@ int proto_register(struct proto *prot, int alloc_slab) write_lock(&proto_list_lock); list_add(&prot->node, &proto_list); + assign_proto_idx(prot); write_unlock(&proto_list_lock); return 0; @@ -1994,8 +2103,6 @@ out_free_request_sock_slab_name: out_free_sock_slab: kmem_cache_destroy(prot->slab); prot->slab = NULL; -out_free_inuse: - sock_prot_inuse_free(prot); out: return -ENOBUFS; } @@ -2005,11 +2112,10 @@ EXPORT_SYMBOL(proto_register); void proto_unregister(struct proto *prot) { write_lock(&proto_list_lock); + release_proto_idx(prot); list_del(&prot->node); write_unlock(&proto_list_lock); - sock_prot_inuse_free(prot); - if (prot->slab != NULL) { kmem_cache_destroy(prot->slab); prot->slab = NULL;