X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=net%2Fipv6%2Fnetfilter%2Fip6_tables.c;h=ea43ef1d94a75de4f9278c00b2c5347cd28b6399;hb=14c850212ed8f8cbb5972ad6b8812e08a0bc901c;hp=7d492226c16e8fef4c414872a0916e40e59d9fcc;hpb=dad2ad82c5f058367df79de022bd12d36afcd065;p=linux-2.6 diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 7d492226c1..ea43ef1d94 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -15,6 +15,7 @@ * - new extension header parser code */ #include +#include #include #include #include @@ -86,11 +87,6 @@ static DECLARE_MUTEX(ip6t_mutex); context stops packets coming through and allows user context to read the counters or update the rules. - To be cache friendly on SMP, we arrange them like so: - [ n-entries ] - ... cache-align padding ... - [ n-entries ] - Hence the start of any table is given by get_table() below. */ /* The table itself */ @@ -108,20 +104,15 @@ struct ip6t_table_info unsigned int underflow[NF_IP6_NUMHOOKS]; /* ip6t_entry tables: one per CPU */ - char entries[0] ____cacheline_aligned; + void *entries[NR_CPUS]; }; static LIST_HEAD(ip6t_target); static LIST_HEAD(ip6t_match); static LIST_HEAD(ip6t_tables); +#define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0) #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0) -#ifdef CONFIG_SMP -#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p)) -#else -#define TABLE_OFFSET(t,p) 0 -#endif - #if 0 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0) #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; }) @@ -376,8 +367,7 @@ ip6t_do_table(struct sk_buff **pskb, read_lock_bh(&table->lock); IP_NF_ASSERT(table->valid_hooks & (1 << hook)); - table_base = (void *)table->private->entries - + TABLE_OFFSET(table->private, smp_processor_id()); + table_base = (void *)table->private->entries[smp_processor_id()]; e = get_entry(table_base, table->private->hook_entry[hook]); #ifdef CONFIG_NETFILTER_DEBUG @@ -649,7 +639,8 @@ unconditional(const struct ip6t_ip6 *ipv6) /* Figures out from what hook each rule can be called: returns 0 if there are loops. Puts hook bitmask in comefrom. */ static int -mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks) +mark_source_chains(struct ip6t_table_info *newinfo, + unsigned int valid_hooks, void *entry0) { unsigned int hook; @@ -658,7 +649,7 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks) for (hook = 0; hook < NF_IP6_NUMHOOKS; hook++) { unsigned int pos = newinfo->hook_entry[hook]; struct ip6t_entry *e - = (struct ip6t_entry *)(newinfo->entries + pos); + = (struct ip6t_entry *)(entry0 + pos); if (!(valid_hooks & (1 << hook))) continue; @@ -708,13 +699,13 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks) goto next; e = (struct ip6t_entry *) - (newinfo->entries + pos); + (entry0 + pos); } while (oldpos == pos + e->next_offset); /* Move along one */ size = e->next_offset; e = (struct ip6t_entry *) - (newinfo->entries + pos + size); + (entry0 + pos + size); e->counters.pcnt = pos; pos += size; } else { @@ -731,7 +722,7 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks) newpos = pos + e->next_offset; } e = (struct ip6t_entry *) - (newinfo->entries + newpos); + (entry0 + newpos); e->counters.pcnt = pos; pos = newpos; } @@ -941,6 +932,7 @@ static int translate_table(const char *name, unsigned int valid_hooks, struct ip6t_table_info *newinfo, + void *entry0, unsigned int size, unsigned int number, const unsigned int *hook_entries, @@ -961,11 +953,11 @@ translate_table(const char *name, duprintf("translate_table: size %u\n", newinfo->size); i = 0; /* Walk through entries, checking offsets. */ - ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, + ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size, check_entry_size_and_hooks, newinfo, - newinfo->entries, - newinfo->entries + size, + entry0, + entry0 + size, hook_entries, underflows, &i); if (ret != 0) return ret; @@ -993,27 +985,24 @@ translate_table(const char *name, } } - if (!mark_source_chains(newinfo, valid_hooks)) + if (!mark_source_chains(newinfo, valid_hooks, entry0)) return -ELOOP; /* Finally, each sanity check must pass */ i = 0; - ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, + ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size, check_entry, name, size, &i); if (ret != 0) { - IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, + IP6T_ENTRY_ITERATE(entry0, newinfo->size, cleanup_entry, &i); return ret; } /* And one copy for every other CPU */ for_each_cpu(i) { - if (i == 0) - continue; - memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i, - newinfo->entries, - SMP_ALIGN(newinfo->size)); + if (newinfo->entries[i] && newinfo->entries[i] != entry0) + memcpy(newinfo->entries[i], entry0, newinfo->size); } return ret; @@ -1029,15 +1018,12 @@ replace_table(struct ip6t_table *table, #ifdef CONFIG_NETFILTER_DEBUG { - struct ip6t_entry *table_base; - unsigned int i; + int cpu; - for_each_cpu(i) { - table_base = - (void *)newinfo->entries - + TABLE_OFFSET(newinfo, i); - - table_base->comefrom = 0xdead57ac; + for_each_cpu(cpu) { + struct ip6t_entry *table_base = newinfo->entries[cpu]; + if (table_base) + table_base->comefrom = 0xdead57ac; } } #endif @@ -1072,16 +1058,44 @@ add_entry_to_counter(const struct ip6t_entry *e, return 0; } +static inline int +set_entry_to_counter(const struct ip6t_entry *e, + struct ip6t_counters total[], + unsigned int *i) +{ + SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); + + (*i)++; + return 0; +} + static void get_counters(const struct ip6t_table_info *t, struct ip6t_counters counters[]) { unsigned int cpu; unsigned int i; + unsigned int curcpu; + + /* Instead of clearing (by a previous call to memset()) + * the counters and using adds, we set the counters + * with data used by 'current' CPU + * We dont care about preemption here. + */ + curcpu = raw_smp_processor_id(); + + i = 0; + IP6T_ENTRY_ITERATE(t->entries[curcpu], + t->size, + set_entry_to_counter, + counters, + &i); for_each_cpu(cpu) { + if (cpu == curcpu) + continue; i = 0; - IP6T_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), + IP6T_ENTRY_ITERATE(t->entries[cpu], t->size, add_entry_to_counter, counters, @@ -1098,6 +1112,7 @@ copy_entries_to_user(unsigned int total_size, struct ip6t_entry *e; struct ip6t_counters *counters; int ret = 0; + void *loc_cpu_entry; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -1109,13 +1124,13 @@ copy_entries_to_user(unsigned int total_size, return -ENOMEM; /* First, sum counters... */ - memset(counters, 0, countersize); write_lock_bh(&table->lock); get_counters(table->private, counters); write_unlock_bh(&table->lock); - /* ... then copy entire thing from CPU 0... */ - if (copy_to_user(userptr, table->private->entries, total_size) != 0) { + /* choose the copy that is on ourc node/cpu */ + loc_cpu_entry = table->private->entries[raw_smp_processor_id()]; + if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { ret = -EFAULT; goto free_counters; } @@ -1127,7 +1142,7 @@ copy_entries_to_user(unsigned int total_size, struct ip6t_entry_match *m; struct ip6t_entry_target *t; - e = (struct ip6t_entry *)(table->private->entries + off); + e = (struct ip6t_entry *)(loc_cpu_entry + off); if (copy_to_user(userptr + off + offsetof(struct ip6t_entry, counters), &counters[num], @@ -1196,6 +1211,46 @@ get_entries(const struct ip6t_get_entries *entries, return ret; } +static void free_table_info(struct ip6t_table_info *info) +{ + int cpu; + for_each_cpu(cpu) { + if (info->size <= PAGE_SIZE) + kfree(info->entries[cpu]); + else + vfree(info->entries[cpu]); + } + kfree(info); +} + +static struct ip6t_table_info *alloc_table_info(unsigned int size) +{ + struct ip6t_table_info *newinfo; + int cpu; + + newinfo = kzalloc(sizeof(struct ip6t_table_info), GFP_KERNEL); + if (!newinfo) + return NULL; + + newinfo->size = size; + + for_each_cpu(cpu) { + if (size <= PAGE_SIZE) + newinfo->entries[cpu] = kmalloc_node(size, + GFP_KERNEL, + cpu_to_node(cpu)); + else + newinfo->entries[cpu] = vmalloc_node(size, + cpu_to_node(cpu)); + if (newinfo->entries[cpu] == NULL) { + free_table_info(newinfo); + return NULL; + } + } + + return newinfo; +} + static int do_replace(void __user *user, unsigned int len) { @@ -1204,6 +1259,7 @@ do_replace(void __user *user, unsigned int len) struct ip6t_table *t; struct ip6t_table_info *newinfo, *oldinfo; struct ip6t_counters *counters; + void *loc_cpu_entry, *loc_cpu_old_entry; if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) return -EFAULT; @@ -1212,13 +1268,13 @@ do_replace(void __user *user, unsigned int len) if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages) return -ENOMEM; - newinfo = vmalloc(sizeof(struct ip6t_table_info) - + SMP_ALIGN(tmp.size) * - (highest_possible_processor_id()+1)); + newinfo = alloc_table_info(tmp.size); if (!newinfo) return -ENOMEM; - if (copy_from_user(newinfo->entries, user + sizeof(tmp), + /* choose the copy that is on our node/cpu */ + loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; goto free_newinfo; @@ -1229,10 +1285,9 @@ do_replace(void __user *user, unsigned int len) ret = -ENOMEM; goto free_newinfo; } - memset(counters, 0, tmp.num_counters * sizeof(struct ip6t_counters)); ret = translate_table(tmp.name, tmp.valid_hooks, - newinfo, tmp.size, tmp.num_entries, + newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, tmp.hook_entry, tmp.underflow); if (ret != 0) goto free_newinfo_counters; @@ -1271,8 +1326,9 @@ do_replace(void __user *user, unsigned int len) /* Get the old counters. */ get_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ - IP6T_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL); - vfree(oldinfo); + loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; + IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL); + free_table_info(oldinfo); if (copy_to_user(tmp.counters, counters, sizeof(struct ip6t_counters) * tmp.num_counters) != 0) ret = -EFAULT; @@ -1284,11 +1340,11 @@ do_replace(void __user *user, unsigned int len) module_put(t->me); up(&ip6t_mutex); free_newinfo_counters_untrans: - IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL); + IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL); free_newinfo_counters: vfree(counters); free_newinfo: - vfree(newinfo); + free_table_info(newinfo); return ret; } @@ -1321,6 +1377,7 @@ do_add_counters(void __user *user, unsigned int len) struct ip6t_counters_info tmp, *paddc; struct ip6t_table *t; int ret = 0; + void *loc_cpu_entry; if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) return -EFAULT; @@ -1350,7 +1407,9 @@ do_add_counters(void __user *user, unsigned int len) } i = 0; - IP6T_ENTRY_ITERATE(t->private->entries, + /* Choose the copy that is on our node */ + loc_cpu_entry = t->private->entries[smp_processor_id()]; + IP6T_ENTRY_ITERATE(loc_cpu_entry, t->private->size, add_counter_to_entry, paddc->counters, @@ -1543,28 +1602,29 @@ int ip6t_register_table(struct ip6t_table *table, struct ip6t_table_info *newinfo; static struct ip6t_table_info bootstrap = { 0, 0, 0, { 0 }, { 0 }, { } }; + void *loc_cpu_entry; - newinfo = vmalloc(sizeof(struct ip6t_table_info) - + SMP_ALIGN(repl->size) * - (highest_possible_processor_id()+1)); + newinfo = alloc_table_info(repl->size); if (!newinfo) return -ENOMEM; - memcpy(newinfo->entries, repl->entries, repl->size); + /* choose the copy on our node/cpu */ + loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + memcpy(loc_cpu_entry, repl->entries, repl->size); ret = translate_table(table->name, table->valid_hooks, - newinfo, repl->size, + newinfo, loc_cpu_entry, repl->size, repl->num_entries, repl->hook_entry, repl->underflow); if (ret != 0) { - vfree(newinfo); + free_table_info(newinfo); return ret; } ret = down_interruptible(&ip6t_mutex); if (ret != 0) { - vfree(newinfo); + free_table_info(newinfo); return ret; } @@ -1593,20 +1653,23 @@ int ip6t_register_table(struct ip6t_table *table, return ret; free_unlock: - vfree(newinfo); + free_table_info(newinfo); goto unlock; } void ip6t_unregister_table(struct ip6t_table *table) { + void *loc_cpu_entry; + down(&ip6t_mutex); LIST_DELETE(&ip6t_tables, table); up(&ip6t_mutex); /* Decrease module usage counts and free resources */ - IP6T_ENTRY_ITERATE(table->private->entries, table->private->size, + loc_cpu_entry = table->private->entries[raw_smp_processor_id()]; + IP6T_ENTRY_ITERATE(loc_cpu_entry, table->private->size, cleanup_entry, NULL); - vfree(table->private); + free_table_info(table->private); } /* Returns 1 if the port is matched by the range, 0 otherwise */ @@ -1972,7 +2035,7 @@ static int ip6t_get_matches(char *buffer, char **start, off_t offset, int length return pos; } -static struct { char *name; get_info_t *get_info; } ip6t_proc_entry[] = +static const struct { char *name; get_info_t *get_info; } ip6t_proc_entry[] = { { "ip6_tables_names", ip6t_get_tables }, { "ip6_tables_targets", ip6t_get_targets }, { "ip6_tables_matches", ip6t_get_matches },