X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=net%2Fcore%2Fsock.c;h=09cb3a74de7f35a976f5b21fbc33a67b3a8224b9;hb=5e226e4d9016daee170699f8a4188a5505021756;hp=8fc2f84209e42eb9fa7a7eff87c3b0f096b674b2;hpb=e36aeee65d4db050bd8713537416a0a0632db079;p=linux-2.6 diff --git a/net/core/sock.c b/net/core/sock.c index 8fc2f84209..09cb3a74de 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -154,7 +154,7 @@ static const char *af_family_key_strings[AF_MAX+1] = { "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" , "sk_lock-21" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" , "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" , - "sk_lock-27" , "sk_lock-28" , "sk_lock-29" , + "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX" }; @@ -168,7 +168,7 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" , "slock-21" , "slock-AF_SNA" , "slock-AF_IRDA" , "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" , - "slock-27" , "slock-28" , "slock-29" , + "slock-27" , "slock-28" , "slock-AF_CAN" , "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , "slock-AF_RXRPC" , "slock-AF_MAX" }; @@ -282,6 +282,11 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (err) goto out; + if (!sk_rmem_schedule(sk, skb->truesize)) { + err = -ENOBUFS; + goto out; + } + skb->dev = NULL; skb_set_owner_r(skb, sk); @@ -419,6 +424,14 @@ out: return ret; } +static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool) +{ + if (valbool) + sock_set_flag(sk, bit); + else + sock_reset_flag(sk, bit); +} + /* * This is meant for all protocols to use and covers goings on * at the socket level. Everything here is generic. @@ -463,11 +476,8 @@ int sock_setsockopt(struct socket *sock, int level, int optname, case SO_DEBUG: if (val && !capable(CAP_NET_ADMIN)) { ret = -EACCES; - } - else if (valbool) - sock_set_flag(sk, SOCK_DBG); - else - sock_reset_flag(sk, SOCK_DBG); + } else + sock_valbool_flag(sk, SOCK_DBG, valbool); break; case SO_REUSEADDR: sk->sk_reuse = valbool; @@ -477,10 +487,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, ret = -ENOPROTOOPT; break; case SO_DONTROUTE: - if (valbool) - sock_set_flag(sk, SOCK_LOCALROUTE); - else - sock_reset_flag(sk, SOCK_LOCALROUTE); + sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool); break; case SO_BROADCAST: sock_valbool_flag(sk, SOCK_BROADCAST, valbool); @@ -660,6 +667,13 @@ set_rcvbuf: else clear_bit(SOCK_PASSSEC, &sock->flags); break; + case SO_MARK: + if (!capable(CAP_NET_ADMIN)) + ret = -EPERM; + else { + sk->sk_mark = val; + } + break; /* We implement the SO_SNDLOWAT etc to not be settable (1003.1g 5.3) */ @@ -829,6 +843,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, case SO_PEERSEC: return security_socket_getpeersec_stream(sock, optval, optlen, len); + case SO_MARK: + v.val = sk->sk_mark; + break; + default: return -ENOPROTOOPT; } @@ -1105,7 +1123,9 @@ void sock_rfree(struct sk_buff *skb) { struct sock *sk = skb->sk; + skb_truesize_check(skb); atomic_sub(skb->truesize, &sk->sk_rmem_alloc); + sk_mem_uncharge(skb->sk, skb->truesize); } @@ -1382,6 +1402,103 @@ int sk_wait_data(struct sock *sk, long *timeo) EXPORT_SYMBOL(sk_wait_data); +/** + * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated + * @sk: socket + * @size: memory size to allocate + * @kind: allocation type + * + * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means + * rmem allocation. This function assumes that protocols which have + * memory_pressure use sk_wmem_queued as write buffer accounting. + */ +int __sk_mem_schedule(struct sock *sk, int size, int kind) +{ + struct proto *prot = sk->sk_prot; + int amt = sk_mem_pages(size); + int allocated; + + sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; + allocated = atomic_add_return(amt, prot->memory_allocated); + + /* Under limit. */ + if (allocated <= prot->sysctl_mem[0]) { + if (prot->memory_pressure && *prot->memory_pressure) + *prot->memory_pressure = 0; + return 1; + } + + /* Under pressure. */ + if (allocated > prot->sysctl_mem[1]) + if (prot->enter_memory_pressure) + prot->enter_memory_pressure(); + + /* Over hard limit. */ + if (allocated > prot->sysctl_mem[2]) + goto suppress_allocation; + + /* guarantee minimum buffer size under pressure */ + if (kind == SK_MEM_RECV) { + if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0]) + return 1; + } else { /* SK_MEM_SEND */ + if (sk->sk_type == SOCK_STREAM) { + if (sk->sk_wmem_queued < prot->sysctl_wmem[0]) + return 1; + } else if (atomic_read(&sk->sk_wmem_alloc) < + prot->sysctl_wmem[0]) + return 1; + } + + if (prot->memory_pressure) { + if (!*prot->memory_pressure || + prot->sysctl_mem[2] > atomic_read(prot->sockets_allocated) * + sk_mem_pages(sk->sk_wmem_queued + + atomic_read(&sk->sk_rmem_alloc) + + sk->sk_forward_alloc)) + return 1; + } + +suppress_allocation: + + if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) { + sk_stream_moderate_sndbuf(sk); + + /* Fail only if socket is _under_ its sndbuf. + * In this case we cannot block, so that we have to fail. + */ + if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) + return 1; + } + + /* Alas. Undo changes. */ + sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; + atomic_sub(amt, prot->memory_allocated); + return 0; +} + +EXPORT_SYMBOL(__sk_mem_schedule); + +/** + * __sk_reclaim - reclaim memory_allocated + * @sk: socket + */ +void __sk_mem_reclaim(struct sock *sk) +{ + struct proto *prot = sk->sk_prot; + + atomic_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT, + prot->memory_allocated); + sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1; + + if (prot->memory_pressure && *prot->memory_pressure && + (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0])) + *prot->memory_pressure = 0; +} + +EXPORT_SYMBOL(__sk_mem_reclaim); + + /* * Set of default routines for initialising struct proto_ops when * the protocol does not support a particular function. In certain @@ -1496,7 +1613,7 @@ static void sock_def_error_report(struct sock *sk) read_lock(&sk->sk_callback_lock); if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible(sk->sk_sleep); - sk_wake_async(sk,0,POLL_ERR); + sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); read_unlock(&sk->sk_callback_lock); } @@ -1505,7 +1622,7 @@ static void sock_def_readable(struct sock *sk, int len) read_lock(&sk->sk_callback_lock); if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible(sk->sk_sleep); - sk_wake_async(sk,1,POLL_IN); + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); read_unlock(&sk->sk_callback_lock); } @@ -1522,7 +1639,7 @@ static void sock_def_write_space(struct sock *sk) /* Should agree with poll, otherwise some programs break */ if (sock_writeable(sk)) - sk_wake_async(sk, 2, POLL_OUT); + sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } read_unlock(&sk->sk_callback_lock); @@ -1537,7 +1654,7 @@ void sk_send_sigurg(struct sock *sk) { if (sk->sk_socket && sk->sk_socket->file) if (send_sigurg(&sk->sk_socket->file->f_owner)) - sk_wake_async(sk, 3, POLL_PRI); + sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI); } void sk_reset_timer(struct sock *sk, struct timer_list* timer, @@ -1611,9 +1728,10 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_stamp = ktime_set(-1L, -1L); atomic_set(&sk->sk_refcnt, 1); + atomic_set(&sk->sk_drops, 0); } -void fastcall lock_sock_nested(struct sock *sk, int subclass) +void lock_sock_nested(struct sock *sk, int subclass) { might_sleep(); spin_lock_bh(&sk->sk_lock.slock); @@ -1630,7 +1748,7 @@ void fastcall lock_sock_nested(struct sock *sk, int subclass) EXPORT_SYMBOL(lock_sock_nested); -void fastcall release_sock(struct sock *sk) +void release_sock(struct sock *sk) { /* * The sk_lock has mutex_unlock() semantics: @@ -1801,65 +1919,15 @@ EXPORT_SYMBOL(sk_common_release); static DEFINE_RWLOCK(proto_list_lock); static LIST_HEAD(proto_list); -#ifdef CONFIG_SMP -/* - * Define default functions to keep track of inuse sockets per protocol - * Note that often used protocols use dedicated functions to get a speed increase. - * (see DEFINE_PROTO_INUSE/REF_PROTO_INUSE) - */ -static void inuse_add(struct proto *prot, int inc) -{ - per_cpu_ptr(prot->inuse_ptr, smp_processor_id())[0] += inc; -} - -static int inuse_get(const struct proto *prot) -{ - int res = 0, cpu; - for_each_possible_cpu(cpu) - res += per_cpu_ptr(prot->inuse_ptr, cpu)[0]; - return res; -} - -static int inuse_init(struct proto *prot) -{ - if (!prot->inuse_getval || !prot->inuse_add) { - prot->inuse_ptr = alloc_percpu(int); - if (prot->inuse_ptr == NULL) - return -ENOBUFS; - - prot->inuse_getval = inuse_get; - prot->inuse_add = inuse_add; - } - return 0; -} - -static void inuse_fini(struct proto *prot) -{ - if (prot->inuse_ptr != NULL) { - free_percpu(prot->inuse_ptr); - prot->inuse_ptr = NULL; - prot->inuse_getval = NULL; - prot->inuse_add = NULL; - } -} -#else -static inline int inuse_init(struct proto *prot) -{ - return 0; -} - -static inline void inuse_fini(struct proto *prot) -{ -} -#endif - int proto_register(struct proto *prot, int alloc_slab) { char *request_sock_slab_name = NULL; char *timewait_sock_slab_name; - if (inuse_init(prot)) + if (sock_prot_inuse_init(prot) != 0) { + printk(KERN_CRIT "%s: Can't alloc inuse counters!\n", prot->name); goto out; + } if (alloc_slab) { prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, @@ -1927,7 +1995,7 @@ out_free_sock_slab: kmem_cache_destroy(prot->slab); prot->slab = NULL; out_free_inuse: - inuse_fini(prot); + sock_prot_inuse_free(prot); out: return -ENOBUFS; } @@ -1940,7 +2008,8 @@ void proto_unregister(struct proto *prot) list_del(&prot->node); write_unlock(&proto_list_lock); - inuse_fini(prot); + sock_prot_inuse_free(prot); + if (prot->slab != NULL) { kmem_cache_destroy(prot->slab); prot->slab = NULL; @@ -1967,6 +2036,7 @@ EXPORT_SYMBOL(proto_unregister); #ifdef CONFIG_PROC_FS static void *proto_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(proto_list_lock) { read_lock(&proto_list_lock); return seq_list_start_head(&proto_list, *pos); @@ -1978,6 +2048,7 @@ static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void proto_seq_stop(struct seq_file *seq, void *v) + __releases(proto_list_lock) { read_unlock(&proto_list_lock); } @@ -2097,7 +2168,3 @@ EXPORT_SYMBOL(sock_wmalloc); EXPORT_SYMBOL(sock_i_uid); EXPORT_SYMBOL(sock_i_ino); EXPORT_SYMBOL(sysctl_optmem_max); -#ifdef CONFIG_SYSCTL -EXPORT_SYMBOL(sysctl_rmem_max); -EXPORT_SYMBOL(sysctl_wmem_max); -#endif