X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=net%2Fipv6%2Fnetfilter%2Fip6_tables.c;h=ea43ef1d94a75de4f9278c00b2c5347cd28b6399;hb=14c850212ed8f8cbb5972ad6b8812e08a0bc901c;hp=1cb8adb2787fc59f44c15dbf0f06ca533955e6f3;hpb=ef88b7dba2b47c70037a34a599d383462bb74bd3;p=linux-2.6 diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 1cb8adb278..ea43ef1d94 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -2,7 +2,7 @@ * Packet matching code. * * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling - * Copyright (C) 2000-2002 Netfilter core team + * Copyright (C) 2000-2005 Netfilter Core Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -15,6 +15,7 @@ * - new extension header parser code */ #include +#include #include #include #include @@ -23,11 +24,11 @@ #include #include #include -#include #include #include #include #include +#include #include @@ -79,18 +80,12 @@ static DECLARE_MUTEX(ip6t_mutex); #define inline #endif -/* Locking is simple: we assume at worst case there will be one packet - in user context and one from bottom halves (or soft irq if Alexey's - softnet patch was applied). - +/* We keep a set of rules for each CPU, so we can avoid write-locking - them; doing a readlock_bh() stops packets coming through if we're - in user context. - - To be cache friendly on SMP, we arrange them like so: - [ n-entries ] - ... cache-align padding ... - [ n-entries ] + them in the softirq when updating the counters and therefore + only need to read-lock in the softirq; doing a write_lock_bh() in user + context stops packets coming through and allows user context to read + the counters or update the rules. Hence the start of any table is given by get_table() below. */ @@ -109,20 +104,15 @@ struct ip6t_table_info unsigned int underflow[NF_IP6_NUMHOOKS]; /* ip6t_entry tables: one per CPU */ - char entries[0] ____cacheline_aligned; + void *entries[NR_CPUS]; }; static LIST_HEAD(ip6t_target); static LIST_HEAD(ip6t_match); static LIST_HEAD(ip6t_tables); +#define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0) #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0) -#ifdef CONFIG_SMP -#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p)) -#else -#define TABLE_OFFSET(t,p) 0 -#endif - #if 0 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0) #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; }) @@ -355,7 +345,7 @@ ip6t_do_table(struct sk_buff **pskb, struct ip6t_table *table, void *userdata) { - static const char nulldevname[IFNAMSIZ]; + static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); int offset = 0; unsigned int protoff = 0; int hotdrop = 0; @@ -368,7 +358,6 @@ ip6t_do_table(struct sk_buff **pskb, /* Initialization */ indev = in ? in->name : nulldevname; outdev = out ? out->name : nulldevname; - /* We handle fragments by dealing with the first fragment as * if it was a normal packet. All other fragments are treated * normally, except that they will NEVER match rules that ask @@ -378,8 +367,7 @@ ip6t_do_table(struct sk_buff **pskb, read_lock_bh(&table->lock); IP_NF_ASSERT(table->valid_hooks & (1 << hook)); - table_base = (void *)table->private->entries - + TABLE_OFFSET(table->private, smp_processor_id()); + table_base = (void *)table->private->entries[smp_processor_id()]; e = get_entry(table_base, table->private->hook_entry[hook]); #ifdef CONFIG_NETFILTER_DEBUG @@ -496,75 +484,145 @@ ip6t_do_table(struct sk_buff **pskb, #endif } -/* If it succeeds, returns element and locks mutex */ -static inline void * -find_inlist_lock_noload(struct list_head *head, - const char *name, - int *error, - struct semaphore *mutex) +/* + * These are weird, but module loading must not be done with mutex + * held (since they will register), and we have to have a single + * function to use try_then_request_module(). + */ + +/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ +static inline struct ip6t_table *find_table_lock(const char *name) { - void *ret; + struct ip6t_table *t; -#if 1 - duprintf("find_inlist: searching for `%s' in %s.\n", - name, head == &ip6t_target ? "ip6t_target" - : head == &ip6t_match ? "ip6t_match" - : head == &ip6t_tables ? "ip6t_tables" : "UNKNOWN"); -#endif + if (down_interruptible(&ip6t_mutex) != 0) + return ERR_PTR(-EINTR); - *error = down_interruptible(mutex); - if (*error != 0) - return NULL; + list_for_each_entry(t, &ip6t_tables, list) + if (strcmp(t->name, name) == 0 && try_module_get(t->me)) + return t; + up(&ip6t_mutex); + return NULL; +} - ret = list_named_find(head, name); - if (!ret) { - *error = -ENOENT; - up(mutex); +/* Find match, grabs ref. Returns ERR_PTR() on error. */ +static inline struct ip6t_match *find_match(const char *name, u8 revision) +{ + struct ip6t_match *m; + int err = 0; + + if (down_interruptible(&ip6t_mutex) != 0) + return ERR_PTR(-EINTR); + + list_for_each_entry(m, &ip6t_match, list) { + if (strcmp(m->name, name) == 0) { + if (m->revision == revision) { + if (try_module_get(m->me)) { + up(&ip6t_mutex); + return m; + } + } else + err = -EPROTOTYPE; /* Found something. */ + } } - return ret; + up(&ip6t_mutex); + return ERR_PTR(err); } -#ifndef CONFIG_KMOD -#define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m)) -#else -static void * -find_inlist_lock(struct list_head *head, - const char *name, - const char *prefix, - int *error, - struct semaphore *mutex) +/* Find target, grabs ref. Returns ERR_PTR() on error. */ +static inline struct ip6t_target *find_target(const char *name, u8 revision) { - void *ret; + struct ip6t_target *t; + int err = 0; - ret = find_inlist_lock_noload(head, name, error, mutex); - if (!ret) { - duprintf("find_inlist: loading `%s%s'.\n", prefix, name); - request_module("%s%s", prefix, name); - ret = find_inlist_lock_noload(head, name, error, mutex); + if (down_interruptible(&ip6t_mutex) != 0) + return ERR_PTR(-EINTR); + + list_for_each_entry(t, &ip6t_target, list) { + if (strcmp(t->name, name) == 0) { + if (t->revision == revision) { + if (try_module_get(t->me)) { + up(&ip6t_mutex); + return t; + } + } else + err = -EPROTOTYPE; /* Found something. */ + } } + up(&ip6t_mutex); + return ERR_PTR(err); +} - return ret; +struct ip6t_target *ip6t_find_target(const char *name, u8 revision) +{ + struct ip6t_target *target; + + target = try_then_request_module(find_target(name, revision), + "ip6t_%s", name); + if (IS_ERR(target) || !target) + return NULL; + return target; } -#endif -static inline struct ip6t_table * -ip6t_find_table_lock(const char *name, int *error, struct semaphore *mutex) +static int match_revfn(const char *name, u8 revision, int *bestp) { - return find_inlist_lock(&ip6t_tables, name, "ip6table_", error, mutex); + struct ip6t_match *m; + int have_rev = 0; + + list_for_each_entry(m, &ip6t_match, list) { + if (strcmp(m->name, name) == 0) { + if (m->revision > *bestp) + *bestp = m->revision; + if (m->revision == revision) + have_rev = 1; + } + } + return have_rev; } -static inline struct ip6t_match * -find_match_lock(const char *name, int *error, struct semaphore *mutex) +static int target_revfn(const char *name, u8 revision, int *bestp) { - return find_inlist_lock(&ip6t_match, name, "ip6t_", error, mutex); + struct ip6t_target *t; + int have_rev = 0; + + list_for_each_entry(t, &ip6t_target, list) { + if (strcmp(t->name, name) == 0) { + if (t->revision > *bestp) + *bestp = t->revision; + if (t->revision == revision) + have_rev = 1; + } + } + return have_rev; } -static struct ip6t_target * -ip6t_find_target_lock(const char *name, int *error, struct semaphore *mutex) +/* Returns true or fals (if no such extension at all) */ +static inline int find_revision(const char *name, u8 revision, + int (*revfn)(const char *, u8, int *), + int *err) { - return find_inlist_lock(&ip6t_target, name, "ip6t_", error, mutex); + int have_rev, best = -1; + + if (down_interruptible(&ip6t_mutex) != 0) { + *err = -EINTR; + return 1; + } + have_rev = revfn(name, revision, &best); + up(&ip6t_mutex); + + /* Nothing at all? Return 0 to try loading module. */ + if (best == -1) { + *err = -ENOENT; + return 0; + } + + *err = best; + if (!have_rev) + *err = -EPROTONOSUPPORT; + return 1; } + /* All zeroes == unconditional rule. */ static inline int unconditional(const struct ip6t_ip6 *ipv6) @@ -581,7 +639,8 @@ unconditional(const struct ip6t_ip6 *ipv6) /* Figures out from what hook each rule can be called: returns 0 if there are loops. Puts hook bitmask in comefrom. */ static int -mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks) +mark_source_chains(struct ip6t_table_info *newinfo, + unsigned int valid_hooks, void *entry0) { unsigned int hook; @@ -590,7 +649,7 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks) for (hook = 0; hook < NF_IP6_NUMHOOKS; hook++) { unsigned int pos = newinfo->hook_entry[hook]; struct ip6t_entry *e - = (struct ip6t_entry *)(newinfo->entries + pos); + = (struct ip6t_entry *)(entry0 + pos); if (!(valid_hooks & (1 << hook))) continue; @@ -640,13 +699,13 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks) goto next; e = (struct ip6t_entry *) - (newinfo->entries + pos); + (entry0 + pos); } while (oldpos == pos + e->next_offset); /* Move along one */ size = e->next_offset; e = (struct ip6t_entry *) - (newinfo->entries + pos + size); + (entry0 + pos + size); e->counters.pcnt = pos; pos += size; } else { @@ -663,7 +722,7 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks) newpos = pos + e->next_offset; } e = (struct ip6t_entry *) - (newinfo->entries + newpos); + (entry0 + newpos); e->counters.pcnt = pos; pos = newpos; } @@ -724,20 +783,16 @@ check_match(struct ip6t_entry_match *m, unsigned int hookmask, unsigned int *i) { - int ret; struct ip6t_match *match; - match = find_match_lock(m->u.user.name, &ret, &ip6t_mutex); - if (!match) { - // duprintf("check_match: `%s' not found\n", m->u.name); - return ret; - } - if (!try_module_get(match->me)) { - up(&ip6t_mutex); - return -ENOENT; + match = try_then_request_module(find_match(m->u.user.name, + m->u.user.revision), + "ip6t_%s", m->u.user.name); + if (IS_ERR(match) || !match) { + duprintf("check_match: `%s' not found\n", m->u.user.name); + return match ? PTR_ERR(match) : -ENOENT; } m->u.kernel.match = match; - up(&ip6t_mutex); if (m->u.kernel.match->checkentry && !m->u.kernel.match->checkentry(name, ipv6, m->data, @@ -775,22 +830,16 @@ check_entry(struct ip6t_entry *e, const char *name, unsigned int size, goto cleanup_matches; t = ip6t_get_target(e); - target = ip6t_find_target_lock(t->u.user.name, &ret, &ip6t_mutex); - if (!target) { + target = try_then_request_module(find_target(t->u.user.name, + t->u.user.revision), + "ip6t_%s", t->u.user.name); + if (IS_ERR(target) || !target) { duprintf("check_entry: `%s' not found\n", t->u.user.name); - goto cleanup_matches; - } - if (!try_module_get(target->me)) { - up(&ip6t_mutex); - ret = -ENOENT; + ret = target ? PTR_ERR(target) : -ENOENT; goto cleanup_matches; } t->u.kernel.target = target; - up(&ip6t_mutex); - if (!t->u.kernel.target) { - ret = -EBUSY; - goto cleanup_matches; - } + if (t->u.kernel.target == &ip6t_standard_target) { if (!standard_check(t, size)) { ret = -EINVAL; @@ -883,6 +932,7 @@ static int translate_table(const char *name, unsigned int valid_hooks, struct ip6t_table_info *newinfo, + void *entry0, unsigned int size, unsigned int number, const unsigned int *hook_entries, @@ -903,11 +953,11 @@ translate_table(const char *name, duprintf("translate_table: size %u\n", newinfo->size); i = 0; /* Walk through entries, checking offsets. */ - ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, + ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size, check_entry_size_and_hooks, newinfo, - newinfo->entries, - newinfo->entries + size, + entry0, + entry0 + size, hook_entries, underflows, &i); if (ret != 0) return ret; @@ -935,25 +985,24 @@ translate_table(const char *name, } } - if (!mark_source_chains(newinfo, valid_hooks)) + if (!mark_source_chains(newinfo, valid_hooks, entry0)) return -ELOOP; /* Finally, each sanity check must pass */ i = 0; - ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, + ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size, check_entry, name, size, &i); if (ret != 0) { - IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, + IP6T_ENTRY_ITERATE(entry0, newinfo->size, cleanup_entry, &i); return ret; } /* And one copy for every other CPU */ - for (i = 1; i < num_possible_cpus(); i++) { - memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i, - newinfo->entries, - SMP_ALIGN(newinfo->size)); + for_each_cpu(i) { + if (newinfo->entries[i] && newinfo->entries[i] != entry0) + memcpy(newinfo->entries[i], entry0, newinfo->size); } return ret; @@ -969,15 +1018,12 @@ replace_table(struct ip6t_table *table, #ifdef CONFIG_NETFILTER_DEBUG { - struct ip6t_entry *table_base; - unsigned int i; + int cpu; - for (i = 0; i < num_possible_cpus(); i++) { - table_base = - (void *)newinfo->entries - + TABLE_OFFSET(newinfo, i); - - table_base->comefrom = 0xdead57ac; + for_each_cpu(cpu) { + struct ip6t_entry *table_base = newinfo->entries[cpu]; + if (table_base) + table_base->comefrom = 0xdead57ac; } } #endif @@ -1012,16 +1058,44 @@ add_entry_to_counter(const struct ip6t_entry *e, return 0; } +static inline int +set_entry_to_counter(const struct ip6t_entry *e, + struct ip6t_counters total[], + unsigned int *i) +{ + SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); + + (*i)++; + return 0; +} + static void get_counters(const struct ip6t_table_info *t, struct ip6t_counters counters[]) { unsigned int cpu; unsigned int i; + unsigned int curcpu; + + /* Instead of clearing (by a previous call to memset()) + * the counters and using adds, we set the counters + * with data used by 'current' CPU + * We dont care about preemption here. + */ + curcpu = raw_smp_processor_id(); - for (cpu = 0; cpu < num_possible_cpus(); cpu++) { + i = 0; + IP6T_ENTRY_ITERATE(t->entries[curcpu], + t->size, + set_entry_to_counter, + counters, + &i); + + for_each_cpu(cpu) { + if (cpu == curcpu) + continue; i = 0; - IP6T_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), + IP6T_ENTRY_ITERATE(t->entries[cpu], t->size, add_entry_to_counter, counters, @@ -1038,6 +1112,7 @@ copy_entries_to_user(unsigned int total_size, struct ip6t_entry *e; struct ip6t_counters *counters; int ret = 0; + void *loc_cpu_entry; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -1049,13 +1124,13 @@ copy_entries_to_user(unsigned int total_size, return -ENOMEM; /* First, sum counters... */ - memset(counters, 0, countersize); write_lock_bh(&table->lock); get_counters(table->private, counters); write_unlock_bh(&table->lock); - /* ... then copy entire thing from CPU 0... */ - if (copy_to_user(userptr, table->private->entries, total_size) != 0) { + /* choose the copy that is on ourc node/cpu */ + loc_cpu_entry = table->private->entries[raw_smp_processor_id()]; + if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { ret = -EFAULT; goto free_counters; } @@ -1067,7 +1142,7 @@ copy_entries_to_user(unsigned int total_size, struct ip6t_entry_match *m; struct ip6t_entry_target *t; - e = (struct ip6t_entry *)(table->private->entries + off); + e = (struct ip6t_entry *)(loc_cpu_entry + off); if (copy_to_user(userptr + off + offsetof(struct ip6t_entry, counters), &counters[num], @@ -1115,8 +1190,8 @@ get_entries(const struct ip6t_get_entries *entries, int ret; struct ip6t_table *t; - t = ip6t_find_table_lock(entries->name, &ret, &ip6t_mutex); - if (t) { + t = find_table_lock(entries->name); + if (t && !IS_ERR(t)) { duprintf("t->private->number = %u\n", t->private->number); if (entries->size == t->private->size) @@ -1128,14 +1203,54 @@ get_entries(const struct ip6t_get_entries *entries, entries->size); ret = -EINVAL; } + module_put(t->me); up(&ip6t_mutex); } else - duprintf("get_entries: Can't find %s!\n", - entries->name); + ret = t ? PTR_ERR(t) : -ENOENT; return ret; } +static void free_table_info(struct ip6t_table_info *info) +{ + int cpu; + for_each_cpu(cpu) { + if (info->size <= PAGE_SIZE) + kfree(info->entries[cpu]); + else + vfree(info->entries[cpu]); + } + kfree(info); +} + +static struct ip6t_table_info *alloc_table_info(unsigned int size) +{ + struct ip6t_table_info *newinfo; + int cpu; + + newinfo = kzalloc(sizeof(struct ip6t_table_info), GFP_KERNEL); + if (!newinfo) + return NULL; + + newinfo->size = size; + + for_each_cpu(cpu) { + if (size <= PAGE_SIZE) + newinfo->entries[cpu] = kmalloc_node(size, + GFP_KERNEL, + cpu_to_node(cpu)); + else + newinfo->entries[cpu] = vmalloc_node(size, + cpu_to_node(cpu)); + if (newinfo->entries[cpu] == NULL) { + free_table_info(newinfo); + return NULL; + } + } + + return newinfo; +} + static int do_replace(void __user *user, unsigned int len) { @@ -1144,6 +1259,7 @@ do_replace(void __user *user, unsigned int len) struct ip6t_table *t; struct ip6t_table_info *newinfo, *oldinfo; struct ip6t_counters *counters; + void *loc_cpu_entry, *loc_cpu_old_entry; if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) return -EFAULT; @@ -1152,12 +1268,13 @@ do_replace(void __user *user, unsigned int len) if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages) return -ENOMEM; - newinfo = vmalloc(sizeof(struct ip6t_table_info) - + SMP_ALIGN(tmp.size) * num_possible_cpus()); + newinfo = alloc_table_info(tmp.size); if (!newinfo) return -ENOMEM; - if (copy_from_user(newinfo->entries, user + sizeof(tmp), + /* choose the copy that is on our node/cpu */ + loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; goto free_newinfo; @@ -1168,32 +1285,28 @@ do_replace(void __user *user, unsigned int len) ret = -ENOMEM; goto free_newinfo; } - memset(counters, 0, tmp.num_counters * sizeof(struct ip6t_counters)); ret = translate_table(tmp.name, tmp.valid_hooks, - newinfo, tmp.size, tmp.num_entries, + newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, tmp.hook_entry, tmp.underflow); if (ret != 0) goto free_newinfo_counters; duprintf("ip_tables: Translated table\n"); - t = ip6t_find_table_lock(tmp.name, &ret, &ip6t_mutex); - if (!t) + t = try_then_request_module(find_table_lock(tmp.name), + "ip6table_%s", tmp.name); + if (!t || IS_ERR(t)) { + ret = t ? PTR_ERR(t) : -ENOENT; goto free_newinfo_counters_untrans; + } /* You lied! */ if (tmp.valid_hooks != t->valid_hooks) { duprintf("Valid hook crap: %08X vs %08X\n", tmp.valid_hooks, t->valid_hooks); ret = -EINVAL; - goto free_newinfo_counters_untrans_unlock; - } - - /* Get a reference in advance, we're not allowed fail later */ - if (!try_module_get(t->me)) { - ret = -EBUSY; - goto free_newinfo_counters_untrans_unlock; + goto put_module; } oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret); @@ -1213,9 +1326,9 @@ do_replace(void __user *user, unsigned int len) /* Get the old counters. */ get_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ - IP6T_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL); - vfree(oldinfo); - /* Silent error: too late now. */ + loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; + IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL); + free_table_info(oldinfo); if (copy_to_user(tmp.counters, counters, sizeof(struct ip6t_counters) * tmp.num_counters) != 0) ret = -EFAULT; @@ -1225,14 +1338,13 @@ do_replace(void __user *user, unsigned int len) put_module: module_put(t->me); - free_newinfo_counters_untrans_unlock: up(&ip6t_mutex); free_newinfo_counters_untrans: - IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL); + IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL); free_newinfo_counters: vfree(counters); free_newinfo: - vfree(newinfo); + free_table_info(newinfo); return ret; } @@ -1264,7 +1376,8 @@ do_add_counters(void __user *user, unsigned int len) unsigned int i; struct ip6t_counters_info tmp, *paddc; struct ip6t_table *t; - int ret; + int ret = 0; + void *loc_cpu_entry; if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) return -EFAULT; @@ -1281,9 +1394,11 @@ do_add_counters(void __user *user, unsigned int len) goto free; } - t = ip6t_find_table_lock(tmp.name, &ret, &ip6t_mutex); - if (!t) + t = find_table_lock(tmp.name); + if (!t || IS_ERR(t)) { + ret = t ? PTR_ERR(t) : -ENOENT; goto free; + } write_lock_bh(&t->lock); if (t->private->number != paddc->num_counters) { @@ -1292,7 +1407,9 @@ do_add_counters(void __user *user, unsigned int len) } i = 0; - IP6T_ENTRY_ITERATE(t->private->entries, + /* Choose the copy that is on our node */ + loc_cpu_entry = t->private->entries[smp_processor_id()]; + IP6T_ENTRY_ITERATE(loc_cpu_entry, t->private->size, add_counter_to_entry, paddc->counters, @@ -1300,6 +1417,7 @@ do_add_counters(void __user *user, unsigned int len) unlock_up_free: write_unlock_bh(&t->lock); up(&ip6t_mutex); + module_put(t->me); free: vfree(paddc); @@ -1356,8 +1474,10 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) break; } name[IP6T_TABLE_MAXNAMELEN-1] = '\0'; - t = ip6t_find_table_lock(name, &ret, &ip6t_mutex); - if (t) { + + t = try_then_request_module(find_table_lock(name), + "ip6table_%s", name); + if (t && !IS_ERR(t)) { struct ip6t_getinfo info; info.valid_hooks = t->valid_hooks; @@ -1373,9 +1493,10 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ret = -EFAULT; else ret = 0; - up(&ip6t_mutex); - } + module_put(t->me); + } else + ret = t ? PTR_ERR(t) : -ENOENT; } break; @@ -1396,6 +1517,31 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) break; } + case IP6T_SO_GET_REVISION_MATCH: + case IP6T_SO_GET_REVISION_TARGET: { + struct ip6t_get_revision rev; + int (*revfn)(const char *, u8, int *); + + if (*len != sizeof(rev)) { + ret = -EINVAL; + break; + } + if (copy_from_user(&rev, user, sizeof(rev)) != 0) { + ret = -EFAULT; + break; + } + + if (cmd == IP6T_SO_GET_REVISION_TARGET) + revfn = target_revfn; + else + revfn = match_revfn; + + try_then_request_module(find_revision(rev.name, rev.revision, + revfn, &ret), + "ip6t_%s", rev.name); + break; + } + default: duprintf("do_ip6t_get_ctl: unknown request %i\n", cmd); ret = -EINVAL; @@ -1413,12 +1559,7 @@ ip6t_register_target(struct ip6t_target *target) ret = down_interruptible(&ip6t_mutex); if (ret != 0) return ret; - - if (!list_named_insert(&ip6t_target, target)) { - duprintf("ip6t_register_target: `%s' already in list!\n", - target->name); - ret = -EINVAL; - } + list_add(&target->list, &ip6t_target); up(&ip6t_mutex); return ret; } @@ -1440,11 +1581,7 @@ ip6t_register_match(struct ip6t_match *match) if (ret != 0) return ret; - if (!list_named_insert(&ip6t_match, match)) { - duprintf("ip6t_register_match: `%s' already in list!\n", - match->name); - ret = -EINVAL; - } + list_add(&match->list, &ip6t_match); up(&ip6t_mutex); return ret; @@ -1465,27 +1602,29 @@ int ip6t_register_table(struct ip6t_table *table, struct ip6t_table_info *newinfo; static struct ip6t_table_info bootstrap = { 0, 0, 0, { 0 }, { 0 }, { } }; + void *loc_cpu_entry; - newinfo = vmalloc(sizeof(struct ip6t_table_info) - + SMP_ALIGN(repl->size) * num_possible_cpus()); + newinfo = alloc_table_info(repl->size); if (!newinfo) return -ENOMEM; - memcpy(newinfo->entries, repl->entries, repl->size); + /* choose the copy on our node/cpu */ + loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + memcpy(loc_cpu_entry, repl->entries, repl->size); ret = translate_table(table->name, table->valid_hooks, - newinfo, repl->size, + newinfo, loc_cpu_entry, repl->size, repl->num_entries, repl->hook_entry, repl->underflow); if (ret != 0) { - vfree(newinfo); + free_table_info(newinfo); return ret; } ret = down_interruptible(&ip6t_mutex); if (ret != 0) { - vfree(newinfo); + free_table_info(newinfo); return ret; } @@ -1514,20 +1653,23 @@ int ip6t_register_table(struct ip6t_table *table, return ret; free_unlock: - vfree(newinfo); + free_table_info(newinfo); goto unlock; } void ip6t_unregister_table(struct ip6t_table *table) { + void *loc_cpu_entry; + down(&ip6t_mutex); LIST_DELETE(&ip6t_tables, table); up(&ip6t_mutex); /* Decrease module usage counts and free resources */ - IP6T_ENTRY_ITERATE(table->private->entries, table->private->size, + loc_cpu_entry = table->private->entries[raw_smp_processor_id()]; + IP6T_ENTRY_ITERATE(loc_cpu_entry, table->private->size, cleanup_entry, NULL); - vfree(table->private); + free_table_info(table->private); } /* Returns 1 if the port is matched by the range, 0 otherwise */ @@ -1893,7 +2035,7 @@ static int ip6t_get_matches(char *buffer, char **start, off_t offset, int length return pos; } -static struct { char *name; get_info_t *get_info; } ip6t_proc_entry[] = +static const struct { char *name; get_info_t *get_info; } ip6t_proc_entry[] = { { "ip6_tables_names", ip6t_get_tables }, { "ip6_tables_targets", ip6t_get_targets }, { "ip6_tables_matches", ip6t_get_matches }, @@ -1955,6 +2097,57 @@ static void __exit fini(void) #endif } +/* + * find specified header up to transport protocol header. + * If found target header, the offset to the header is set to *offset + * and return 0. otherwise, return -1. + * + * Notes: - non-1st Fragment Header isn't skipped. + * - ESP header isn't skipped. + * - The target header may be trancated. + */ +int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target) +{ + unsigned int start = (u8*)(skb->nh.ipv6h + 1) - skb->data; + u8 nexthdr = skb->nh.ipv6h->nexthdr; + unsigned int len = skb->len - start; + + while (nexthdr != target) { + struct ipv6_opt_hdr _hdr, *hp; + unsigned int hdrlen; + + if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) + return -1; + hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); + if (hp == NULL) + return -1; + if (nexthdr == NEXTHDR_FRAGMENT) { + unsigned short _frag_off, *fp; + fp = skb_header_pointer(skb, + start+offsetof(struct frag_hdr, + frag_off), + sizeof(_frag_off), + &_frag_off); + if (fp == NULL) + return -1; + + if (ntohs(*fp) & ~0x7) + return -1; + hdrlen = 8; + } else if (nexthdr == NEXTHDR_AUTH) + hdrlen = (hp->hdrlen + 2) << 2; + else + hdrlen = ipv6_optlen(hp); + + nexthdr = hp->nexthdr; + len -= hdrlen; + start += hdrlen; + } + + *offset = start; + return 0; +} + EXPORT_SYMBOL(ip6t_register_table); EXPORT_SYMBOL(ip6t_unregister_table); EXPORT_SYMBOL(ip6t_do_table); @@ -1963,6 +2156,7 @@ EXPORT_SYMBOL(ip6t_unregister_match); EXPORT_SYMBOL(ip6t_register_target); EXPORT_SYMBOL(ip6t_unregister_target); EXPORT_SYMBOL(ip6t_ext_hdr); +EXPORT_SYMBOL(ipv6_find_hdr); module_init(init); module_exit(fini);