]> err.no Git - linux-2.6/commitdiff
[NET]: Define infrastructure to keep 'inuse' changes in an efficent SMP/NUMA way.
authorEric Dumazet <dada1@cosmosbay.com>
Tue, 6 Nov 2007 07:38:39 +0000 (23:38 -0800)
committerDavid S. Miller <davem@sunset.davemloft.net>
Wed, 7 Nov 2007 12:08:57 +0000 (04:08 -0800)
"struct proto" currently uses an array stats[NR_CPUS] to track change on
'inuse' sockets per protocol.

If NR_CPUS is big, this means we use a big memory area for this.
Moreover, all this memory area is located on a single node on NUMA
machines, increasing memory pressure on the boot node.

In this patch, I tried to :

- Keep a fast !CONFIG_SMP implementation
- Keep a fast CONFIG_SMP implementation for often used protocols
(tcp,udp,raw,...)
- Introduce a NUMA efficient implementation

Some helper macros are defined in include/net/sock.h
These macros take into account CONFIG_SMP

If a "struct proto" is declared without using DEFINE_PROTO_INUSE /
REF_PROTO_INUSE
macros, it will automatically use a default implementation, using a
dynamically allocated percpu zone.
This default implementation will be NUMA efficient, but might use 32/64
bytes per possible cpu
because of current alloc_percpu() implementation.
However it still should be better than previous implementation based on
stats[NR_CPUS] field.

When a "struct proto" is changed to use the new macros, we use a single
static "int" percpu variable,
lowering the memory and cpu costs, still preserving NUMA efficiency.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/sock.h
net/core/sock.c
net/ipv4/proc.c
net/ipv6/proc.c

index 20de3fa7ae40001bbe7771bb32915b8604f686f7..5504fb9fa88a43e9cbce0cf7e4a9db54b188ec3b 100644 (file)
@@ -560,6 +560,14 @@ struct proto {
        void                    (*unhash)(struct sock *sk);
        int                     (*get_port)(struct sock *sk, unsigned short snum);
 
+#ifdef CONFIG_SMP
+       /* Keeping track of sockets in use */
+       void                    (*inuse_add)(struct proto *prot, int inc);
+       int                     (*inuse_getval)(const struct proto *prot);
+       int                     *inuse_ptr;
+#else
+       int                     inuse;
+#endif
        /* Memory pressure */
        void                    (*enter_memory_pressure)(void);
        atomic_t                *memory_allocated;      /* Current allocated memory. */
@@ -592,12 +600,38 @@ struct proto {
 #ifdef SOCK_REFCNT_DEBUG
        atomic_t                socks;
 #endif
-       struct {
-               int inuse;
-               u8  __pad[SMP_CACHE_BYTES - sizeof(int)];
-       } stats[NR_CPUS];
 };
 
+/*
+ * Special macros to let protos use a fast version of inuse{get|add}
+ * using a static percpu variable per proto instead of an allocated one,
+ * saving one dereference.
+ * This might be changed if/when dynamic percpu vars become fast.
+ */
+#ifdef CONFIG_SMP
+# define DEFINE_PROTO_INUSE(NAME)                      \
+static DEFINE_PER_CPU(int, NAME##_inuse);              \
+static void NAME##_inuse_add(struct proto *prot, int inc)      \
+{                                                      \
+       __get_cpu_var(NAME##_inuse) += inc;             \
+}                                                      \
+                                                       \
+static int NAME##_inuse_getval(const struct proto *prot)\
+{                                                      \
+       int res = 0, cpu;                               \
+                                                       \
+       for_each_possible_cpu(cpu)                      \
+               res += per_cpu(NAME##_inuse, cpu);      \
+       return res;                                     \
+}
+# define REF_PROTO_INUSE(NAME)                         \
+       .inuse_add = NAME##_inuse_add,                  \
+       .inuse_getval = NAME##_inuse_getval,
+#else
+# define DEFINE_PROTO_INUSE(NAME)
+# define REF_PROTO_INUSE(NAME)
+#endif
+
 extern int proto_register(struct proto *prot, int alloc_slab);
 extern void proto_unregister(struct proto *prot);
 
@@ -629,12 +663,29 @@ static inline void sk_refcnt_debug_release(const struct sock *sk)
 /* Called with local bh disabled */
 static __inline__ void sock_prot_inc_use(struct proto *prot)
 {
-       prot->stats[smp_processor_id()].inuse++;
+#ifdef CONFIG_SMP
+       prot->inuse_add(prot, 1);
+#else
+       prot->inuse++;
+#endif
 }
 
 static __inline__ void sock_prot_dec_use(struct proto *prot)
 {
-       prot->stats[smp_processor_id()].inuse--;
+#ifdef CONFIG_SMP
+       prot->inuse_add(prot, -1);
+#else
+       prot->inuse--;
+#endif
+}
+
+static __inline__ int sock_prot_inuse(struct proto *proto)
+{
+#ifdef CONFIG_SMP
+       return proto->inuse_getval(proto);
+#else
+       return proto->inuse;
+#endif
 }
 
 /* With per-bucket locks this operation is not-atomic, so that
index 12ad2067a988405b2768afa4eb421afd8440a1c8..e077f263b730246bad8e2fb4967828960abeb361 100644 (file)
@@ -1801,12 +1801,41 @@ EXPORT_SYMBOL(sk_common_release);
 static DEFINE_RWLOCK(proto_list_lock);
 static LIST_HEAD(proto_list);
 
+#ifdef CONFIG_SMP
+/*
+ * Define default functions to keep track of inuse sockets per protocol
+ * Note that often used protocols use dedicated functions to get a speed increase.
+ * (see DEFINE_PROTO_INUSE/REF_PROTO_INUSE)
+ */
+static void inuse_add(struct proto *prot, int inc)
+{
+       per_cpu_ptr(prot->inuse_ptr, smp_processor_id())[0] += inc;
+}
+
+static int inuse_get(const struct proto *prot)
+{
+       int res = 0, cpu;
+       for_each_possible_cpu(cpu)
+               res += per_cpu_ptr(prot->inuse_ptr, cpu)[0];
+       return res;
+}
+#endif
+
 int proto_register(struct proto *prot, int alloc_slab)
 {
        char *request_sock_slab_name = NULL;
        char *timewait_sock_slab_name;
        int rc = -ENOBUFS;
 
+#ifdef CONFIG_SMP
+       if (!prot->inuse_getval || !prot->inuse_add) {
+               prot->inuse_ptr = alloc_percpu(int);
+               if (prot->inuse_ptr == NULL)
+                       goto out;
+               prot->inuse_getval = inuse_get;
+               prot->inuse_add = inuse_add;
+       }
+#endif
        if (alloc_slab) {
                prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
                                               SLAB_HWCACHE_ALIGN, NULL);
@@ -1814,7 +1843,7 @@ int proto_register(struct proto *prot, int alloc_slab)
                if (prot->slab == NULL) {
                        printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
                               prot->name);
-                       goto out;
+                       goto out_free_inuse;
                }
 
                if (prot->rsk_prot != NULL) {
@@ -1873,6 +1902,15 @@ out_free_request_sock_slab_name:
 out_free_sock_slab:
        kmem_cache_destroy(prot->slab);
        prot->slab = NULL;
+out_free_inuse:
+#ifdef CONFIG_SMP
+       if (prot->inuse_ptr != NULL) {
+               free_percpu(prot->inuse_ptr);
+               prot->inuse_ptr = NULL;
+               prot->inuse_getval = NULL;
+               prot->inuse_add = NULL;
+       }
+#endif
        goto out;
 }
 
@@ -1884,6 +1922,14 @@ void proto_unregister(struct proto *prot)
        list_del(&prot->node);
        write_unlock(&proto_list_lock);
 
+#ifdef CONFIG_SMP
+       if (prot->inuse_ptr != NULL) {
+               free_percpu(prot->inuse_ptr);
+               prot->inuse_ptr = NULL;
+               prot->inuse_getval = NULL;
+               prot->inuse_add = NULL;
+       }
+#endif
        if (prot->slab != NULL) {
                kmem_cache_destroy(prot->slab);
                prot->slab = NULL;
index ffdccc0972e039d52dd5e236ee71f677fb4474f2..ce34b281803f479ba29196aafaad416a01441191 100644 (file)
 #include <net/sock.h>
 #include <net/raw.h>
 
-static int fold_prot_inuse(struct proto *proto)
-{
-       int res = 0;
-       int cpu;
-
-       for_each_possible_cpu(cpu)
-               res += proto->stats[cpu].inuse;
-
-       return res;
-}
-
 /*
  *     Report socket allocation statistics [mea@utu.fi]
  */
@@ -64,12 +53,12 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
 {
        socket_seq_show(seq);
        seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
-                  fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count),
+                  sock_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count),
                   tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated),
                   atomic_read(&tcp_memory_allocated));
-       seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot));
-       seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot));
-       seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot));
+       seq_printf(seq, "UDP: inuse %d\n", sock_prot_inuse(&udp_prot));
+       seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse(&udplite_prot));
+       seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse(&raw_prot));
        seq_printf(seq,  "FRAG: inuse %d memory %d\n",
                        ip_frag_nqueues(), ip_frag_mem());
        return 0;
index be526ad925431b96040e26683ae63f8d2426b203..8631ed7fe8a9a84b65b398c9557e1f45ad20e220 100644 (file)
 
 static struct proc_dir_entry *proc_net_devsnmp6;
 
-static int fold_prot_inuse(struct proto *proto)
-{
-       int res = 0;
-       int cpu;
-
-       for_each_possible_cpu(cpu)
-               res += proto->stats[cpu].inuse;
-
-       return res;
-}
-
 static int sockstat6_seq_show(struct seq_file *seq, void *v)
 {
        seq_printf(seq, "TCP6: inuse %d\n",
-                      fold_prot_inuse(&tcpv6_prot));
+                      sock_prot_inuse(&tcpv6_prot));
        seq_printf(seq, "UDP6: inuse %d\n",
-                      fold_prot_inuse(&udpv6_prot));
+                      sock_prot_inuse(&udpv6_prot));
        seq_printf(seq, "UDPLITE6: inuse %d\n",
-                       fold_prot_inuse(&udplitev6_prot));
+                       sock_prot_inuse(&udplitev6_prot));
        seq_printf(seq, "RAW6: inuse %d\n",
-                      fold_prot_inuse(&rawv6_prot));
+                      sock_prot_inuse(&rawv6_prot));
        seq_printf(seq, "FRAG6: inuse %d memory %d\n",
                       ip6_frag_nqueues(), ip6_frag_mem());
        return 0;