]> err.no Git - linux-2.6/blob - net/ipv4/netfilter/ip_tables.c
[IPV6]: Don't redo xfrm_lookup for cached dst entries
[linux-2.6] / net / ipv4 / netfilter / ip_tables.c
1 /*
2  * Packet matching code.
3  *
4  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5  * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  *
11  * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12  *      - increase module usage count as soon as we have rules inside
13  *        a table
14  */
15 #include <linux/config.h>
16 #include <linux/cache.h>
17 #include <linux/skbuff.h>
18 #include <linux/kmod.h>
19 #include <linux/vmalloc.h>
20 #include <linux/netdevice.h>
21 #include <linux/module.h>
22 #include <linux/tcp.h>
23 #include <linux/udp.h>
24 #include <linux/icmp.h>
25 #include <net/ip.h>
26 #include <asm/uaccess.h>
27 #include <asm/semaphore.h>
28 #include <linux/proc_fs.h>
29 #include <linux/err.h>
30
31 #include <linux/netfilter_ipv4/ip_tables.h>
32
33 MODULE_LICENSE("GPL");
34 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
35 MODULE_DESCRIPTION("IPv4 packet filter");
36
37 /*#define DEBUG_IP_FIREWALL*/
38 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
39 /*#define DEBUG_IP_FIREWALL_USER*/
40
41 #ifdef DEBUG_IP_FIREWALL
42 #define dprintf(format, args...)  printk(format , ## args)
43 #else
44 #define dprintf(format, args...)
45 #endif
46
47 #ifdef DEBUG_IP_FIREWALL_USER
48 #define duprintf(format, args...) printk(format , ## args)
49 #else
50 #define duprintf(format, args...)
51 #endif
52
53 #ifdef CONFIG_NETFILTER_DEBUG
54 #define IP_NF_ASSERT(x)                                         \
55 do {                                                            \
56         if (!(x))                                               \
57                 printk("IP_NF_ASSERT: %s:%s:%u\n",              \
58                        __FUNCTION__, __FILE__, __LINE__);       \
59 } while(0)
60 #else
61 #define IP_NF_ASSERT(x)
62 #endif
63 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
64
65 static DECLARE_MUTEX(ipt_mutex);
66
67 /* Must have mutex */
68 #define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
69 #define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
70 #include <linux/netfilter_ipv4/listhelp.h>
71
72 #if 0
73 /* All the better to debug you with... */
74 #define static
75 #define inline
76 #endif
77
78 /*
79    We keep a set of rules for each CPU, so we can avoid write-locking
80    them in the softirq when updating the counters and therefore
81    only need to read-lock in the softirq; doing a write_lock_bh() in user
82    context stops packets coming through and allows user context to read
83    the counters or update the rules.
84
85    To be cache friendly on SMP, we arrange them like so:
86    [ n-entries ]
87    ... cache-align padding ...
88    [ n-entries ]
89
90    Hence the start of any table is given by get_table() below.  */
91
92 /* The table itself */
93 struct ipt_table_info
94 {
95         /* Size per table */
96         unsigned int size;
97         /* Number of entries: FIXME. --RR */
98         unsigned int number;
99         /* Initial number of entries. Needed for module usage count */
100         unsigned int initial_entries;
101
102         /* Entry points and underflows */
103         unsigned int hook_entry[NF_IP_NUMHOOKS];
104         unsigned int underflow[NF_IP_NUMHOOKS];
105
106         /* ipt_entry tables: one per CPU */
107         char entries[0] ____cacheline_aligned;
108 };
109
110 static LIST_HEAD(ipt_target);
111 static LIST_HEAD(ipt_match);
112 static LIST_HEAD(ipt_tables);
113 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
114
115 #ifdef CONFIG_SMP
116 #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
117 #else
118 #define TABLE_OFFSET(t,p) 0
119 #endif
120
121 #if 0
122 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
123 #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
124 #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
125 #endif
126
127 /* Returns whether matches rule or not. */
128 static inline int
129 ip_packet_match(const struct iphdr *ip,
130                 const char *indev,
131                 const char *outdev,
132                 const struct ipt_ip *ipinfo,
133                 int isfrag)
134 {
135         size_t i;
136         unsigned long ret;
137
138 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
139
140         if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
141                   IPT_INV_SRCIP)
142             || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
143                      IPT_INV_DSTIP)) {
144                 dprintf("Source or dest mismatch.\n");
145
146                 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
147                         NIPQUAD(ip->saddr),
148                         NIPQUAD(ipinfo->smsk.s_addr),
149                         NIPQUAD(ipinfo->src.s_addr),
150                         ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
151                 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
152                         NIPQUAD(ip->daddr),
153                         NIPQUAD(ipinfo->dmsk.s_addr),
154                         NIPQUAD(ipinfo->dst.s_addr),
155                         ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
156                 return 0;
157         }
158
159         /* Look for ifname matches; this should unroll nicely. */
160         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
161                 ret |= (((const unsigned long *)indev)[i]
162                         ^ ((const unsigned long *)ipinfo->iniface)[i])
163                         & ((const unsigned long *)ipinfo->iniface_mask)[i];
164         }
165
166         if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
167                 dprintf("VIA in mismatch (%s vs %s).%s\n",
168                         indev, ipinfo->iniface,
169                         ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
170                 return 0;
171         }
172
173         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
174                 ret |= (((const unsigned long *)outdev)[i]
175                         ^ ((const unsigned long *)ipinfo->outiface)[i])
176                         & ((const unsigned long *)ipinfo->outiface_mask)[i];
177         }
178
179         if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
180                 dprintf("VIA out mismatch (%s vs %s).%s\n",
181                         outdev, ipinfo->outiface,
182                         ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
183                 return 0;
184         }
185
186         /* Check specific protocol */
187         if (ipinfo->proto
188             && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
189                 dprintf("Packet protocol %hi does not match %hi.%s\n",
190                         ip->protocol, ipinfo->proto,
191                         ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
192                 return 0;
193         }
194
195         /* If we have a fragment rule but the packet is not a fragment
196          * then we return zero */
197         if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
198                 dprintf("Fragment rule but not fragment.%s\n",
199                         ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
200                 return 0;
201         }
202
203         return 1;
204 }
205
206 static inline int
207 ip_checkentry(const struct ipt_ip *ip)
208 {
209         if (ip->flags & ~IPT_F_MASK) {
210                 duprintf("Unknown flag bits set: %08X\n",
211                          ip->flags & ~IPT_F_MASK);
212                 return 0;
213         }
214         if (ip->invflags & ~IPT_INV_MASK) {
215                 duprintf("Unknown invflag bits set: %08X\n",
216                          ip->invflags & ~IPT_INV_MASK);
217                 return 0;
218         }
219         return 1;
220 }
221
222 static unsigned int
223 ipt_error(struct sk_buff **pskb,
224           const struct net_device *in,
225           const struct net_device *out,
226           unsigned int hooknum,
227           const void *targinfo,
228           void *userinfo)
229 {
230         if (net_ratelimit())
231                 printk("ip_tables: error: `%s'\n", (char *)targinfo);
232
233         return NF_DROP;
234 }
235
236 static inline
237 int do_match(struct ipt_entry_match *m,
238              const struct sk_buff *skb,
239              const struct net_device *in,
240              const struct net_device *out,
241              int offset,
242              int *hotdrop)
243 {
244         /* Stop iteration if it doesn't match */
245         if (!m->u.kernel.match->match(skb, in, out, m->data, offset, hotdrop))
246                 return 1;
247         else
248                 return 0;
249 }
250
251 static inline struct ipt_entry *
252 get_entry(void *base, unsigned int offset)
253 {
254         return (struct ipt_entry *)(base + offset);
255 }
256
257 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
258 unsigned int
259 ipt_do_table(struct sk_buff **pskb,
260              unsigned int hook,
261              const struct net_device *in,
262              const struct net_device *out,
263              struct ipt_table *table,
264              void *userdata)
265 {
266         static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
267         u_int16_t offset;
268         struct iphdr *ip;
269         u_int16_t datalen;
270         int hotdrop = 0;
271         /* Initializing verdict to NF_DROP keeps gcc happy. */
272         unsigned int verdict = NF_DROP;
273         const char *indev, *outdev;
274         void *table_base;
275         struct ipt_entry *e, *back;
276
277         /* Initialization */
278         ip = (*pskb)->nh.iph;
279         datalen = (*pskb)->len - ip->ihl * 4;
280         indev = in ? in->name : nulldevname;
281         outdev = out ? out->name : nulldevname;
282         /* We handle fragments by dealing with the first fragment as
283          * if it was a normal packet.  All other fragments are treated
284          * normally, except that they will NEVER match rules that ask
285          * things we don't know, ie. tcp syn flag or ports).  If the
286          * rule is also a fragment-specific rule, non-fragments won't
287          * match it. */
288         offset = ntohs(ip->frag_off) & IP_OFFSET;
289
290         read_lock_bh(&table->lock);
291         IP_NF_ASSERT(table->valid_hooks & (1 << hook));
292         table_base = (void *)table->private->entries
293                 + TABLE_OFFSET(table->private, smp_processor_id());
294         e = get_entry(table_base, table->private->hook_entry[hook]);
295
296 #ifdef CONFIG_NETFILTER_DEBUG
297         /* Check noone else using our table */
298         if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
299             && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
300                 printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
301                        smp_processor_id(),
302                        table->name,
303                        &((struct ipt_entry *)table_base)->comefrom,
304                        ((struct ipt_entry *)table_base)->comefrom);
305         }
306         ((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
307 #endif
308
309         /* For return from builtin chain */
310         back = get_entry(table_base, table->private->underflow[hook]);
311
312         do {
313                 IP_NF_ASSERT(e);
314                 IP_NF_ASSERT(back);
315                 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
316                         struct ipt_entry_target *t;
317
318                         if (IPT_MATCH_ITERATE(e, do_match,
319                                               *pskb, in, out,
320                                               offset, &hotdrop) != 0)
321                                 goto no_match;
322
323                         ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
324
325                         t = ipt_get_target(e);
326                         IP_NF_ASSERT(t->u.kernel.target);
327                         /* Standard target? */
328                         if (!t->u.kernel.target->target) {
329                                 int v;
330
331                                 v = ((struct ipt_standard_target *)t)->verdict;
332                                 if (v < 0) {
333                                         /* Pop from stack? */
334                                         if (v != IPT_RETURN) {
335                                                 verdict = (unsigned)(-v) - 1;
336                                                 break;
337                                         }
338                                         e = back;
339                                         back = get_entry(table_base,
340                                                          back->comefrom);
341                                         continue;
342                                 }
343                                 if (table_base + v != (void *)e + e->next_offset
344                                     && !(e->ip.flags & IPT_F_GOTO)) {
345                                         /* Save old back ptr in next entry */
346                                         struct ipt_entry *next
347                                                 = (void *)e + e->next_offset;
348                                         next->comefrom
349                                                 = (void *)back - table_base;
350                                         /* set back pointer to next entry */
351                                         back = next;
352                                 }
353
354                                 e = get_entry(table_base, v);
355                         } else {
356                                 /* Targets which reenter must return
357                                    abs. verdicts */
358 #ifdef CONFIG_NETFILTER_DEBUG
359                                 ((struct ipt_entry *)table_base)->comefrom
360                                         = 0xeeeeeeec;
361 #endif
362                                 verdict = t->u.kernel.target->target(pskb,
363                                                                      in, out,
364                                                                      hook,
365                                                                      t->data,
366                                                                      userdata);
367
368 #ifdef CONFIG_NETFILTER_DEBUG
369                                 if (((struct ipt_entry *)table_base)->comefrom
370                                     != 0xeeeeeeec
371                                     && verdict == IPT_CONTINUE) {
372                                         printk("Target %s reentered!\n",
373                                                t->u.kernel.target->name);
374                                         verdict = NF_DROP;
375                                 }
376                                 ((struct ipt_entry *)table_base)->comefrom
377                                         = 0x57acc001;
378 #endif
379                                 /* Target might have changed stuff. */
380                                 ip = (*pskb)->nh.iph;
381                                 datalen = (*pskb)->len - ip->ihl * 4;
382
383                                 if (verdict == IPT_CONTINUE)
384                                         e = (void *)e + e->next_offset;
385                                 else
386                                         /* Verdict */
387                                         break;
388                         }
389                 } else {
390
391                 no_match:
392                         e = (void *)e + e->next_offset;
393                 }
394         } while (!hotdrop);
395
396 #ifdef CONFIG_NETFILTER_DEBUG
397         ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
398 #endif
399         read_unlock_bh(&table->lock);
400
401 #ifdef DEBUG_ALLOW_ALL
402         return NF_ACCEPT;
403 #else
404         if (hotdrop)
405                 return NF_DROP;
406         else return verdict;
407 #endif
408 }
409
410 /*
411  * These are weird, but module loading must not be done with mutex
412  * held (since they will register), and we have to have a single
413  * function to use try_then_request_module().
414  */
415
416 /* Find table by name, grabs mutex & ref.  Returns ERR_PTR() on error. */
417 static inline struct ipt_table *find_table_lock(const char *name)
418 {
419         struct ipt_table *t;
420
421         if (down_interruptible(&ipt_mutex) != 0)
422                 return ERR_PTR(-EINTR);
423
424         list_for_each_entry(t, &ipt_tables, list)
425                 if (strcmp(t->name, name) == 0 && try_module_get(t->me))
426                         return t;
427         up(&ipt_mutex);
428         return NULL;
429 }
430
431 /* Find match, grabs ref.  Returns ERR_PTR() on error. */
432 static inline struct ipt_match *find_match(const char *name, u8 revision)
433 {
434         struct ipt_match *m;
435         int err = 0;
436
437         if (down_interruptible(&ipt_mutex) != 0)
438                 return ERR_PTR(-EINTR);
439
440         list_for_each_entry(m, &ipt_match, list) {
441                 if (strcmp(m->name, name) == 0) {
442                         if (m->revision == revision) {
443                                 if (try_module_get(m->me)) {
444                                         up(&ipt_mutex);
445                                         return m;
446                                 }
447                         } else
448                                 err = -EPROTOTYPE; /* Found something. */
449                 }
450         }
451         up(&ipt_mutex);
452         return ERR_PTR(err);
453 }
454
455 /* Find target, grabs ref.  Returns ERR_PTR() on error. */
456 static inline struct ipt_target *find_target(const char *name, u8 revision)
457 {
458         struct ipt_target *t;
459         int err = 0;
460
461         if (down_interruptible(&ipt_mutex) != 0)
462                 return ERR_PTR(-EINTR);
463
464         list_for_each_entry(t, &ipt_target, list) {
465                 if (strcmp(t->name, name) == 0) {
466                         if (t->revision == revision) {
467                                 if (try_module_get(t->me)) {
468                                         up(&ipt_mutex);
469                                         return t;
470                                 }
471                         } else
472                                 err = -EPROTOTYPE; /* Found something. */
473                 }
474         }
475         up(&ipt_mutex);
476         return ERR_PTR(err);
477 }
478
479 struct ipt_target *ipt_find_target(const char *name, u8 revision)
480 {
481         struct ipt_target *target;
482
483         target = try_then_request_module(find_target(name, revision),
484                                          "ipt_%s", name);
485         if (IS_ERR(target) || !target)
486                 return NULL;
487         return target;
488 }
489
490 static int match_revfn(const char *name, u8 revision, int *bestp)
491 {
492         struct ipt_match *m;
493         int have_rev = 0;
494
495         list_for_each_entry(m, &ipt_match, list) {
496                 if (strcmp(m->name, name) == 0) {
497                         if (m->revision > *bestp)
498                                 *bestp = m->revision;
499                         if (m->revision == revision)
500                                 have_rev = 1;
501                 }
502         }
503         return have_rev;
504 }
505
506 static int target_revfn(const char *name, u8 revision, int *bestp)
507 {
508         struct ipt_target *t;
509         int have_rev = 0;
510
511         list_for_each_entry(t, &ipt_target, list) {
512                 if (strcmp(t->name, name) == 0) {
513                         if (t->revision > *bestp)
514                                 *bestp = t->revision;
515                         if (t->revision == revision)
516                                 have_rev = 1;
517                 }
518         }
519         return have_rev;
520 }
521
522 /* Returns true or false (if no such extension at all) */
523 static inline int find_revision(const char *name, u8 revision,
524                                 int (*revfn)(const char *, u8, int *),
525                                 int *err)
526 {
527         int have_rev, best = -1;
528
529         if (down_interruptible(&ipt_mutex) != 0) {
530                 *err = -EINTR;
531                 return 1;
532         }
533         have_rev = revfn(name, revision, &best);
534         up(&ipt_mutex);
535
536         /* Nothing at all?  Return 0 to try loading module. */
537         if (best == -1) {
538                 *err = -ENOENT;
539                 return 0;
540         }
541
542         *err = best;
543         if (!have_rev)
544                 *err = -EPROTONOSUPPORT;
545         return 1;
546 }
547
548
549 /* All zeroes == unconditional rule. */
550 static inline int
551 unconditional(const struct ipt_ip *ip)
552 {
553         unsigned int i;
554
555         for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
556                 if (((__u32 *)ip)[i])
557                         return 0;
558
559         return 1;
560 }
561
562 /* Figures out from what hook each rule can be called: returns 0 if
563    there are loops.  Puts hook bitmask in comefrom. */
564 static int
565 mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
566 {
567         unsigned int hook;
568
569         /* No recursion; use packet counter to save back ptrs (reset
570            to 0 as we leave), and comefrom to save source hook bitmask */
571         for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
572                 unsigned int pos = newinfo->hook_entry[hook];
573                 struct ipt_entry *e
574                         = (struct ipt_entry *)(newinfo->entries + pos);
575
576                 if (!(valid_hooks & (1 << hook)))
577                         continue;
578
579                 /* Set initial back pointer. */
580                 e->counters.pcnt = pos;
581
582                 for (;;) {
583                         struct ipt_standard_target *t
584                                 = (void *)ipt_get_target(e);
585
586                         if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
587                                 printk("iptables: loop hook %u pos %u %08X.\n",
588                                        hook, pos, e->comefrom);
589                                 return 0;
590                         }
591                         e->comefrom
592                                 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
593
594                         /* Unconditional return/END. */
595                         if (e->target_offset == sizeof(struct ipt_entry)
596                             && (strcmp(t->target.u.user.name,
597                                        IPT_STANDARD_TARGET) == 0)
598                             && t->verdict < 0
599                             && unconditional(&e->ip)) {
600                                 unsigned int oldpos, size;
601
602                                 /* Return: backtrack through the last
603                                    big jump. */
604                                 do {
605                                         e->comefrom ^= (1<<NF_IP_NUMHOOKS);
606 #ifdef DEBUG_IP_FIREWALL_USER
607                                         if (e->comefrom
608                                             & (1 << NF_IP_NUMHOOKS)) {
609                                                 duprintf("Back unset "
610                                                          "on hook %u "
611                                                          "rule %u\n",
612                                                          hook, pos);
613                                         }
614 #endif
615                                         oldpos = pos;
616                                         pos = e->counters.pcnt;
617                                         e->counters.pcnt = 0;
618
619                                         /* We're at the start. */
620                                         if (pos == oldpos)
621                                                 goto next;
622
623                                         e = (struct ipt_entry *)
624                                                 (newinfo->entries + pos);
625                                 } while (oldpos == pos + e->next_offset);
626
627                                 /* Move along one */
628                                 size = e->next_offset;
629                                 e = (struct ipt_entry *)
630                                         (newinfo->entries + pos + size);
631                                 e->counters.pcnt = pos;
632                                 pos += size;
633                         } else {
634                                 int newpos = t->verdict;
635
636                                 if (strcmp(t->target.u.user.name,
637                                            IPT_STANDARD_TARGET) == 0
638                                     && newpos >= 0) {
639                                         /* This a jump; chase it. */
640                                         duprintf("Jump rule %u -> %u\n",
641                                                  pos, newpos);
642                                 } else {
643                                         /* ... this is a fallthru */
644                                         newpos = pos + e->next_offset;
645                                 }
646                                 e = (struct ipt_entry *)
647                                         (newinfo->entries + newpos);
648                                 e->counters.pcnt = pos;
649                                 pos = newpos;
650                         }
651                 }
652                 next:
653                 duprintf("Finished chain %u\n", hook);
654         }
655         return 1;
656 }
657
658 static inline int
659 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
660 {
661         if (i && (*i)-- == 0)
662                 return 1;
663
664         if (m->u.kernel.match->destroy)
665                 m->u.kernel.match->destroy(m->data,
666                                            m->u.match_size - sizeof(*m));
667         module_put(m->u.kernel.match->me);
668         return 0;
669 }
670
671 static inline int
672 standard_check(const struct ipt_entry_target *t,
673                unsigned int max_offset)
674 {
675         struct ipt_standard_target *targ = (void *)t;
676
677         /* Check standard info. */
678         if (t->u.target_size
679             != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
680                 duprintf("standard_check: target size %u != %u\n",
681                          t->u.target_size,
682                          IPT_ALIGN(sizeof(struct ipt_standard_target)));
683                 return 0;
684         }
685
686         if (targ->verdict >= 0
687             && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
688                 duprintf("ipt_standard_check: bad verdict (%i)\n",
689                          targ->verdict);
690                 return 0;
691         }
692
693         if (targ->verdict < -NF_MAX_VERDICT - 1) {
694                 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
695                          targ->verdict);
696                 return 0;
697         }
698         return 1;
699 }
700
701 static inline int
702 check_match(struct ipt_entry_match *m,
703             const char *name,
704             const struct ipt_ip *ip,
705             unsigned int hookmask,
706             unsigned int *i)
707 {
708         struct ipt_match *match;
709
710         match = try_then_request_module(find_match(m->u.user.name,
711                                                    m->u.user.revision),
712                                         "ipt_%s", m->u.user.name);
713         if (IS_ERR(match) || !match) {
714                 duprintf("check_match: `%s' not found\n", m->u.user.name);
715                 return match ? PTR_ERR(match) : -ENOENT;
716         }
717         m->u.kernel.match = match;
718
719         if (m->u.kernel.match->checkentry
720             && !m->u.kernel.match->checkentry(name, ip, m->data,
721                                               m->u.match_size - sizeof(*m),
722                                               hookmask)) {
723                 module_put(m->u.kernel.match->me);
724                 duprintf("ip_tables: check failed for `%s'.\n",
725                          m->u.kernel.match->name);
726                 return -EINVAL;
727         }
728
729         (*i)++;
730         return 0;
731 }
732
733 static struct ipt_target ipt_standard_target;
734
735 static inline int
736 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
737             unsigned int *i)
738 {
739         struct ipt_entry_target *t;
740         struct ipt_target *target;
741         int ret;
742         unsigned int j;
743
744         if (!ip_checkentry(&e->ip)) {
745                 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
746                 return -EINVAL;
747         }
748
749         j = 0;
750         ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
751         if (ret != 0)
752                 goto cleanup_matches;
753
754         t = ipt_get_target(e);
755         target = try_then_request_module(find_target(t->u.user.name,
756                                                      t->u.user.revision),
757                                          "ipt_%s", t->u.user.name);
758         if (IS_ERR(target) || !target) {
759                 duprintf("check_entry: `%s' not found\n", t->u.user.name);
760                 ret = target ? PTR_ERR(target) : -ENOENT;
761                 goto cleanup_matches;
762         }
763         t->u.kernel.target = target;
764
765         if (t->u.kernel.target == &ipt_standard_target) {
766                 if (!standard_check(t, size)) {
767                         ret = -EINVAL;
768                         goto cleanup_matches;
769                 }
770         } else if (t->u.kernel.target->checkentry
771                    && !t->u.kernel.target->checkentry(name, e, t->data,
772                                                       t->u.target_size
773                                                       - sizeof(*t),
774                                                       e->comefrom)) {
775                 module_put(t->u.kernel.target->me);
776                 duprintf("ip_tables: check failed for `%s'.\n",
777                          t->u.kernel.target->name);
778                 ret = -EINVAL;
779                 goto cleanup_matches;
780         }
781
782         (*i)++;
783         return 0;
784
785  cleanup_matches:
786         IPT_MATCH_ITERATE(e, cleanup_match, &j);
787         return ret;
788 }
789
790 static inline int
791 check_entry_size_and_hooks(struct ipt_entry *e,
792                            struct ipt_table_info *newinfo,
793                            unsigned char *base,
794                            unsigned char *limit,
795                            const unsigned int *hook_entries,
796                            const unsigned int *underflows,
797                            unsigned int *i)
798 {
799         unsigned int h;
800
801         if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
802             || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
803                 duprintf("Bad offset %p\n", e);
804                 return -EINVAL;
805         }
806
807         if (e->next_offset
808             < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
809                 duprintf("checking: element %p size %u\n",
810                          e, e->next_offset);
811                 return -EINVAL;
812         }
813
814         /* Check hooks & underflows */
815         for (h = 0; h < NF_IP_NUMHOOKS; h++) {
816                 if ((unsigned char *)e - base == hook_entries[h])
817                         newinfo->hook_entry[h] = hook_entries[h];
818                 if ((unsigned char *)e - base == underflows[h])
819                         newinfo->underflow[h] = underflows[h];
820         }
821
822         /* FIXME: underflows must be unconditional, standard verdicts
823            < 0 (not IPT_RETURN). --RR */
824
825         /* Clear counters and comefrom */
826         e->counters = ((struct ipt_counters) { 0, 0 });
827         e->comefrom = 0;
828
829         (*i)++;
830         return 0;
831 }
832
833 static inline int
834 cleanup_entry(struct ipt_entry *e, unsigned int *i)
835 {
836         struct ipt_entry_target *t;
837
838         if (i && (*i)-- == 0)
839                 return 1;
840
841         /* Cleanup all matches */
842         IPT_MATCH_ITERATE(e, cleanup_match, NULL);
843         t = ipt_get_target(e);
844         if (t->u.kernel.target->destroy)
845                 t->u.kernel.target->destroy(t->data,
846                                             t->u.target_size - sizeof(*t));
847         module_put(t->u.kernel.target->me);
848         return 0;
849 }
850
851 /* Checks and translates the user-supplied table segment (held in
852    newinfo) */
853 static int
854 translate_table(const char *name,
855                 unsigned int valid_hooks,
856                 struct ipt_table_info *newinfo,
857                 unsigned int size,
858                 unsigned int number,
859                 const unsigned int *hook_entries,
860                 const unsigned int *underflows)
861 {
862         unsigned int i;
863         int ret;
864
865         newinfo->size = size;
866         newinfo->number = number;
867
868         /* Init all hooks to impossible value. */
869         for (i = 0; i < NF_IP_NUMHOOKS; i++) {
870                 newinfo->hook_entry[i] = 0xFFFFFFFF;
871                 newinfo->underflow[i] = 0xFFFFFFFF;
872         }
873
874         duprintf("translate_table: size %u\n", newinfo->size);
875         i = 0;
876         /* Walk through entries, checking offsets. */
877         ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
878                                 check_entry_size_and_hooks,
879                                 newinfo,
880                                 newinfo->entries,
881                                 newinfo->entries + size,
882                                 hook_entries, underflows, &i);
883         if (ret != 0)
884                 return ret;
885
886         if (i != number) {
887                 duprintf("translate_table: %u not %u entries\n",
888                          i, number);
889                 return -EINVAL;
890         }
891
892         /* Check hooks all assigned */
893         for (i = 0; i < NF_IP_NUMHOOKS; i++) {
894                 /* Only hooks which are valid */
895                 if (!(valid_hooks & (1 << i)))
896                         continue;
897                 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
898                         duprintf("Invalid hook entry %u %u\n",
899                                  i, hook_entries[i]);
900                         return -EINVAL;
901                 }
902                 if (newinfo->underflow[i] == 0xFFFFFFFF) {
903                         duprintf("Invalid underflow %u %u\n",
904                                  i, underflows[i]);
905                         return -EINVAL;
906                 }
907         }
908
909         if (!mark_source_chains(newinfo, valid_hooks))
910                 return -ELOOP;
911
912         /* Finally, each sanity check must pass */
913         i = 0;
914         ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
915                                 check_entry, name, size, &i);
916
917         if (ret != 0) {
918                 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
919                                   cleanup_entry, &i);
920                 return ret;
921         }
922
923         /* And one copy for every other CPU */
924         for (i = 1; i < num_possible_cpus(); i++) {
925                 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
926                        newinfo->entries,
927                        SMP_ALIGN(newinfo->size));
928         }
929
930         return ret;
931 }
932
933 static struct ipt_table_info *
934 replace_table(struct ipt_table *table,
935               unsigned int num_counters,
936               struct ipt_table_info *newinfo,
937               int *error)
938 {
939         struct ipt_table_info *oldinfo;
940
941 #ifdef CONFIG_NETFILTER_DEBUG
942         {
943                 struct ipt_entry *table_base;
944                 unsigned int i;
945
946                 for (i = 0; i < num_possible_cpus(); i++) {
947                         table_base =
948                                 (void *)newinfo->entries
949                                 + TABLE_OFFSET(newinfo, i);
950
951                         table_base->comefrom = 0xdead57ac;
952                 }
953         }
954 #endif
955
956         /* Do the substitution. */
957         write_lock_bh(&table->lock);
958         /* Check inside lock: is the old number correct? */
959         if (num_counters != table->private->number) {
960                 duprintf("num_counters != table->private->number (%u/%u)\n",
961                          num_counters, table->private->number);
962                 write_unlock_bh(&table->lock);
963                 *error = -EAGAIN;
964                 return NULL;
965         }
966         oldinfo = table->private;
967         table->private = newinfo;
968         newinfo->initial_entries = oldinfo->initial_entries;
969         write_unlock_bh(&table->lock);
970
971         return oldinfo;
972 }
973
974 /* Gets counters. */
975 static inline int
976 add_entry_to_counter(const struct ipt_entry *e,
977                      struct ipt_counters total[],
978                      unsigned int *i)
979 {
980         ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
981
982         (*i)++;
983         return 0;
984 }
985
986 static void
987 get_counters(const struct ipt_table_info *t,
988              struct ipt_counters counters[])
989 {
990         unsigned int cpu;
991         unsigned int i;
992
993         for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
994                 i = 0;
995                 IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
996                                   t->size,
997                                   add_entry_to_counter,
998                                   counters,
999                                   &i);
1000         }
1001 }
1002
1003 static int
1004 copy_entries_to_user(unsigned int total_size,
1005                      struct ipt_table *table,
1006                      void __user *userptr)
1007 {
1008         unsigned int off, num, countersize;
1009         struct ipt_entry *e;
1010         struct ipt_counters *counters;
1011         int ret = 0;
1012
1013         /* We need atomic snapshot of counters: rest doesn't change
1014            (other than comefrom, which userspace doesn't care
1015            about). */
1016         countersize = sizeof(struct ipt_counters) * table->private->number;
1017         counters = vmalloc(countersize);
1018
1019         if (counters == NULL)
1020                 return -ENOMEM;
1021
1022         /* First, sum counters... */
1023         memset(counters, 0, countersize);
1024         write_lock_bh(&table->lock);
1025         get_counters(table->private, counters);
1026         write_unlock_bh(&table->lock);
1027
1028         /* ... then copy entire thing from CPU 0... */
1029         if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
1030                 ret = -EFAULT;
1031                 goto free_counters;
1032         }
1033
1034         /* FIXME: use iterator macros --RR */
1035         /* ... then go back and fix counters and names */
1036         for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
1037                 unsigned int i;
1038                 struct ipt_entry_match *m;
1039                 struct ipt_entry_target *t;
1040
1041                 e = (struct ipt_entry *)(table->private->entries + off);
1042                 if (copy_to_user(userptr + off
1043                                  + offsetof(struct ipt_entry, counters),
1044                                  &counters[num],
1045                                  sizeof(counters[num])) != 0) {
1046                         ret = -EFAULT;
1047                         goto free_counters;
1048                 }
1049
1050                 for (i = sizeof(struct ipt_entry);
1051                      i < e->target_offset;
1052                      i += m->u.match_size) {
1053                         m = (void *)e + i;
1054
1055                         if (copy_to_user(userptr + off + i
1056                                          + offsetof(struct ipt_entry_match,
1057                                                     u.user.name),
1058                                          m->u.kernel.match->name,
1059                                          strlen(m->u.kernel.match->name)+1)
1060                             != 0) {
1061                                 ret = -EFAULT;
1062                                 goto free_counters;
1063                         }
1064                 }
1065
1066                 t = ipt_get_target(e);
1067                 if (copy_to_user(userptr + off + e->target_offset
1068                                  + offsetof(struct ipt_entry_target,
1069                                             u.user.name),
1070                                  t->u.kernel.target->name,
1071                                  strlen(t->u.kernel.target->name)+1) != 0) {
1072                         ret = -EFAULT;
1073                         goto free_counters;
1074                 }
1075         }
1076
1077  free_counters:
1078         vfree(counters);
1079         return ret;
1080 }
1081
1082 static int
1083 get_entries(const struct ipt_get_entries *entries,
1084             struct ipt_get_entries __user *uptr)
1085 {
1086         int ret;
1087         struct ipt_table *t;
1088
1089         t = find_table_lock(entries->name);
1090         if (t && !IS_ERR(t)) {
1091                 duprintf("t->private->number = %u\n",
1092                          t->private->number);
1093                 if (entries->size == t->private->size)
1094                         ret = copy_entries_to_user(t->private->size,
1095                                                    t, uptr->entrytable);
1096                 else {
1097                         duprintf("get_entries: I've got %u not %u!\n",
1098                                  t->private->size,
1099                                  entries->size);
1100                         ret = -EINVAL;
1101                 }
1102                 module_put(t->me);
1103                 up(&ipt_mutex);
1104         } else
1105                 ret = t ? PTR_ERR(t) : -ENOENT;
1106
1107         return ret;
1108 }
1109
1110 static int
1111 do_replace(void __user *user, unsigned int len)
1112 {
1113         int ret;
1114         struct ipt_replace tmp;
1115         struct ipt_table *t;
1116         struct ipt_table_info *newinfo, *oldinfo;
1117         struct ipt_counters *counters;
1118
1119         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1120                 return -EFAULT;
1121
1122         /* Hack: Causes ipchains to give correct error msg --RR */
1123         if (len != sizeof(tmp) + tmp.size)
1124                 return -ENOPROTOOPT;
1125
1126         /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
1127         if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1128                 return -ENOMEM;
1129
1130         newinfo = vmalloc(sizeof(struct ipt_table_info)
1131                           + SMP_ALIGN(tmp.size) * num_possible_cpus());
1132         if (!newinfo)
1133                 return -ENOMEM;
1134
1135         if (copy_from_user(newinfo->entries, user + sizeof(tmp),
1136                            tmp.size) != 0) {
1137                 ret = -EFAULT;
1138                 goto free_newinfo;
1139         }
1140
1141         counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
1142         if (!counters) {
1143                 ret = -ENOMEM;
1144                 goto free_newinfo;
1145         }
1146         memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
1147
1148         ret = translate_table(tmp.name, tmp.valid_hooks,
1149                               newinfo, tmp.size, tmp.num_entries,
1150                               tmp.hook_entry, tmp.underflow);
1151         if (ret != 0)
1152                 goto free_newinfo_counters;
1153
1154         duprintf("ip_tables: Translated table\n");
1155
1156         t = try_then_request_module(find_table_lock(tmp.name),
1157                                     "iptable_%s", tmp.name);
1158         if (!t || IS_ERR(t)) {
1159                 ret = t ? PTR_ERR(t) : -ENOENT;
1160                 goto free_newinfo_counters_untrans;
1161         }
1162
1163         /* You lied! */
1164         if (tmp.valid_hooks != t->valid_hooks) {
1165                 duprintf("Valid hook crap: %08X vs %08X\n",
1166                          tmp.valid_hooks, t->valid_hooks);
1167                 ret = -EINVAL;
1168                 goto put_module;
1169         }
1170
1171         oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1172         if (!oldinfo)
1173                 goto put_module;
1174
1175         /* Update module usage count based on number of rules */
1176         duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1177                 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1178         if ((oldinfo->number > oldinfo->initial_entries) || 
1179             (newinfo->number <= oldinfo->initial_entries)) 
1180                 module_put(t->me);
1181         if ((oldinfo->number > oldinfo->initial_entries) &&
1182             (newinfo->number <= oldinfo->initial_entries))
1183                 module_put(t->me);
1184
1185         /* Get the old counters. */
1186         get_counters(oldinfo, counters);
1187         /* Decrease module usage counts and free resource */
1188         IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
1189         vfree(oldinfo);
1190         if (copy_to_user(tmp.counters, counters,
1191                          sizeof(struct ipt_counters) * tmp.num_counters) != 0)
1192                 ret = -EFAULT;
1193         vfree(counters);
1194         up(&ipt_mutex);
1195         return ret;
1196
1197  put_module:
1198         module_put(t->me);
1199         up(&ipt_mutex);
1200  free_newinfo_counters_untrans:
1201         IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
1202  free_newinfo_counters:
1203         vfree(counters);
1204  free_newinfo:
1205         vfree(newinfo);
1206         return ret;
1207 }
1208
1209 /* We're lazy, and add to the first CPU; overflow works its fey magic
1210  * and everything is OK. */
1211 static inline int
1212 add_counter_to_entry(struct ipt_entry *e,
1213                      const struct ipt_counters addme[],
1214                      unsigned int *i)
1215 {
1216 #if 0
1217         duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1218                  *i,
1219                  (long unsigned int)e->counters.pcnt,
1220                  (long unsigned int)e->counters.bcnt,
1221                  (long unsigned int)addme[*i].pcnt,
1222                  (long unsigned int)addme[*i].bcnt);
1223 #endif
1224
1225         ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1226
1227         (*i)++;
1228         return 0;
1229 }
1230
1231 static int
1232 do_add_counters(void __user *user, unsigned int len)
1233 {
1234         unsigned int i;
1235         struct ipt_counters_info tmp, *paddc;
1236         struct ipt_table *t;
1237         int ret = 0;
1238
1239         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1240                 return -EFAULT;
1241
1242         if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
1243                 return -EINVAL;
1244
1245         paddc = vmalloc(len);
1246         if (!paddc)
1247                 return -ENOMEM;
1248
1249         if (copy_from_user(paddc, user, len) != 0) {
1250                 ret = -EFAULT;
1251                 goto free;
1252         }
1253
1254         t = find_table_lock(tmp.name);
1255         if (!t || IS_ERR(t)) {
1256                 ret = t ? PTR_ERR(t) : -ENOENT;
1257                 goto free;
1258         }
1259
1260         write_lock_bh(&t->lock);
1261         if (t->private->number != paddc->num_counters) {
1262                 ret = -EINVAL;
1263                 goto unlock_up_free;
1264         }
1265
1266         i = 0;
1267         IPT_ENTRY_ITERATE(t->private->entries,
1268                           t->private->size,
1269                           add_counter_to_entry,
1270                           paddc->counters,
1271                           &i);
1272  unlock_up_free:
1273         write_unlock_bh(&t->lock);
1274         up(&ipt_mutex);
1275         module_put(t->me);
1276  free:
1277         vfree(paddc);
1278
1279         return ret;
1280 }
1281
1282 static int
1283 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1284 {
1285         int ret;
1286
1287         if (!capable(CAP_NET_ADMIN))
1288                 return -EPERM;
1289
1290         switch (cmd) {
1291         case IPT_SO_SET_REPLACE:
1292                 ret = do_replace(user, len);
1293                 break;
1294
1295         case IPT_SO_SET_ADD_COUNTERS:
1296                 ret = do_add_counters(user, len);
1297                 break;
1298
1299         default:
1300                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
1301                 ret = -EINVAL;
1302         }
1303
1304         return ret;
1305 }
1306
1307 static int
1308 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1309 {
1310         int ret;
1311
1312         if (!capable(CAP_NET_ADMIN))
1313                 return -EPERM;
1314
1315         switch (cmd) {
1316         case IPT_SO_GET_INFO: {
1317                 char name[IPT_TABLE_MAXNAMELEN];
1318                 struct ipt_table *t;
1319
1320                 if (*len != sizeof(struct ipt_getinfo)) {
1321                         duprintf("length %u != %u\n", *len,
1322                                  sizeof(struct ipt_getinfo));
1323                         ret = -EINVAL;
1324                         break;
1325                 }
1326
1327                 if (copy_from_user(name, user, sizeof(name)) != 0) {
1328                         ret = -EFAULT;
1329                         break;
1330                 }
1331                 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1332
1333                 t = try_then_request_module(find_table_lock(name),
1334                                             "iptable_%s", name);
1335                 if (t && !IS_ERR(t)) {
1336                         struct ipt_getinfo info;
1337
1338                         info.valid_hooks = t->valid_hooks;
1339                         memcpy(info.hook_entry, t->private->hook_entry,
1340                                sizeof(info.hook_entry));
1341                         memcpy(info.underflow, t->private->underflow,
1342                                sizeof(info.underflow));
1343                         info.num_entries = t->private->number;
1344                         info.size = t->private->size;
1345                         memcpy(info.name, name, sizeof(info.name));
1346
1347                         if (copy_to_user(user, &info, *len) != 0)
1348                                 ret = -EFAULT;
1349                         else
1350                                 ret = 0;
1351                         up(&ipt_mutex);
1352                         module_put(t->me);
1353                 } else
1354                         ret = t ? PTR_ERR(t) : -ENOENT;
1355         }
1356         break;
1357
1358         case IPT_SO_GET_ENTRIES: {
1359                 struct ipt_get_entries get;
1360
1361                 if (*len < sizeof(get)) {
1362                         duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1363                         ret = -EINVAL;
1364                 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1365                         ret = -EFAULT;
1366                 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1367                         duprintf("get_entries: %u != %u\n", *len,
1368                                  sizeof(struct ipt_get_entries) + get.size);
1369                         ret = -EINVAL;
1370                 } else
1371                         ret = get_entries(&get, user);
1372                 break;
1373         }
1374
1375         case IPT_SO_GET_REVISION_MATCH:
1376         case IPT_SO_GET_REVISION_TARGET: {
1377                 struct ipt_get_revision rev;
1378                 int (*revfn)(const char *, u8, int *);
1379
1380                 if (*len != sizeof(rev)) {
1381                         ret = -EINVAL;
1382                         break;
1383                 }
1384                 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
1385                         ret = -EFAULT;
1386                         break;
1387                 }
1388
1389                 if (cmd == IPT_SO_GET_REVISION_TARGET)
1390                         revfn = target_revfn;
1391                 else
1392                         revfn = match_revfn;
1393
1394                 try_then_request_module(find_revision(rev.name, rev.revision,
1395                                                       revfn, &ret),
1396                                         "ipt_%s", rev.name);
1397                 break;
1398         }
1399
1400         default:
1401                 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1402                 ret = -EINVAL;
1403         }
1404
1405         return ret;
1406 }
1407
1408 /* Registration hooks for targets. */
1409 int
1410 ipt_register_target(struct ipt_target *target)
1411 {
1412         int ret;
1413
1414         ret = down_interruptible(&ipt_mutex);
1415         if (ret != 0)
1416                 return ret;
1417         list_add(&target->list, &ipt_target);
1418         up(&ipt_mutex);
1419         return ret;
1420 }
1421
1422 void
1423 ipt_unregister_target(struct ipt_target *target)
1424 {
1425         down(&ipt_mutex);
1426         LIST_DELETE(&ipt_target, target);
1427         up(&ipt_mutex);
1428 }
1429
1430 int
1431 ipt_register_match(struct ipt_match *match)
1432 {
1433         int ret;
1434
1435         ret = down_interruptible(&ipt_mutex);
1436         if (ret != 0)
1437                 return ret;
1438
1439         list_add(&match->list, &ipt_match);
1440         up(&ipt_mutex);
1441
1442         return ret;
1443 }
1444
1445 void
1446 ipt_unregister_match(struct ipt_match *match)
1447 {
1448         down(&ipt_mutex);
1449         LIST_DELETE(&ipt_match, match);
1450         up(&ipt_mutex);
1451 }
1452
1453 int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
1454 {
1455         int ret;
1456         struct ipt_table_info *newinfo;
1457         static struct ipt_table_info bootstrap
1458                 = { 0, 0, 0, { 0 }, { 0 }, { } };
1459
1460         newinfo = vmalloc(sizeof(struct ipt_table_info)
1461                           + SMP_ALIGN(repl->size) * num_possible_cpus());
1462         if (!newinfo)
1463                 return -ENOMEM;
1464
1465         memcpy(newinfo->entries, repl->entries, repl->size);
1466
1467         ret = translate_table(table->name, table->valid_hooks,
1468                               newinfo, repl->size,
1469                               repl->num_entries,
1470                               repl->hook_entry,
1471                               repl->underflow);
1472         if (ret != 0) {
1473                 vfree(newinfo);
1474                 return ret;
1475         }
1476
1477         ret = down_interruptible(&ipt_mutex);
1478         if (ret != 0) {
1479                 vfree(newinfo);
1480                 return ret;
1481         }
1482
1483         /* Don't autoload: we'd eat our tail... */
1484         if (list_named_find(&ipt_tables, table->name)) {
1485                 ret = -EEXIST;
1486                 goto free_unlock;
1487         }
1488
1489         /* Simplifies replace_table code. */
1490         table->private = &bootstrap;
1491         if (!replace_table(table, 0, newinfo, &ret))
1492                 goto free_unlock;
1493
1494         duprintf("table->private->number = %u\n",
1495                  table->private->number);
1496         
1497         /* save number of initial entries */
1498         table->private->initial_entries = table->private->number;
1499
1500         rwlock_init(&table->lock);
1501         list_prepend(&ipt_tables, table);
1502
1503  unlock:
1504         up(&ipt_mutex);
1505         return ret;
1506
1507  free_unlock:
1508         vfree(newinfo);
1509         goto unlock;
1510 }
1511
1512 void ipt_unregister_table(struct ipt_table *table)
1513 {
1514         down(&ipt_mutex);
1515         LIST_DELETE(&ipt_tables, table);
1516         up(&ipt_mutex);
1517
1518         /* Decrease module usage counts and free resources */
1519         IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
1520                           cleanup_entry, NULL);
1521         vfree(table->private);
1522 }
1523
1524 /* Returns 1 if the port is matched by the range, 0 otherwise */
1525 static inline int
1526 port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1527 {
1528         int ret;
1529
1530         ret = (port >= min && port <= max) ^ invert;
1531         return ret;
1532 }
1533
1534 static int
1535 tcp_find_option(u_int8_t option,
1536                 const struct sk_buff *skb,
1537                 unsigned int optlen,
1538                 int invert,
1539                 int *hotdrop)
1540 {
1541         /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
1542         u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
1543         unsigned int i;
1544
1545         duprintf("tcp_match: finding option\n");
1546
1547         if (!optlen)
1548                 return invert;
1549
1550         /* If we don't have the whole header, drop packet. */
1551         op = skb_header_pointer(skb,
1552                                 skb->nh.iph->ihl*4 + sizeof(struct tcphdr),
1553                                 optlen, _opt);
1554         if (op == NULL) {
1555                 *hotdrop = 1;
1556                 return 0;
1557         }
1558
1559         for (i = 0; i < optlen; ) {
1560                 if (op[i] == option) return !invert;
1561                 if (op[i] < 2) i++;
1562                 else i += op[i+1]?:1;
1563         }
1564
1565         return invert;
1566 }
1567
1568 static int
1569 tcp_match(const struct sk_buff *skb,
1570           const struct net_device *in,
1571           const struct net_device *out,
1572           const void *matchinfo,
1573           int offset,
1574           int *hotdrop)
1575 {
1576         struct tcphdr _tcph, *th;
1577         const struct ipt_tcp *tcpinfo = matchinfo;
1578
1579         if (offset) {
1580                 /* To quote Alan:
1581
1582                    Don't allow a fragment of TCP 8 bytes in. Nobody normal
1583                    causes this. Its a cracker trying to break in by doing a
1584                    flag overwrite to pass the direction checks.
1585                 */
1586                 if (offset == 1) {
1587                         duprintf("Dropping evil TCP offset=1 frag.\n");
1588                         *hotdrop = 1;
1589                 }
1590                 /* Must not be a fragment. */
1591                 return 0;
1592         }
1593
1594 #define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1595
1596         th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1597                                 sizeof(_tcph), &_tcph);
1598         if (th == NULL) {
1599                 /* We've been asked to examine this packet, and we
1600                    can't.  Hence, no choice but to drop. */
1601                 duprintf("Dropping evil TCP offset=0 tinygram.\n");
1602                 *hotdrop = 1;
1603                 return 0;
1604         }
1605
1606         if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1607                         ntohs(th->source),
1608                         !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)))
1609                 return 0;
1610         if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1611                         ntohs(th->dest),
1612                         !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)))
1613                 return 0;
1614         if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
1615                       == tcpinfo->flg_cmp,
1616                       IPT_TCP_INV_FLAGS))
1617                 return 0;
1618         if (tcpinfo->option) {
1619                 if (th->doff * 4 < sizeof(_tcph)) {
1620                         *hotdrop = 1;
1621                         return 0;
1622                 }
1623                 if (!tcp_find_option(tcpinfo->option, skb,
1624                                      th->doff*4 - sizeof(_tcph),
1625                                      tcpinfo->invflags & IPT_TCP_INV_OPTION,
1626                                      hotdrop))
1627                         return 0;
1628         }
1629         return 1;
1630 }
1631
1632 /* Called when user tries to insert an entry of this type. */
1633 static int
1634 tcp_checkentry(const char *tablename,
1635                const struct ipt_ip *ip,
1636                void *matchinfo,
1637                unsigned int matchsize,
1638                unsigned int hook_mask)
1639 {
1640         const struct ipt_tcp *tcpinfo = matchinfo;
1641
1642         /* Must specify proto == TCP, and no unknown invflags */
1643         return ip->proto == IPPROTO_TCP
1644                 && !(ip->invflags & IPT_INV_PROTO)
1645                 && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
1646                 && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
1647 }
1648
1649 static int
1650 udp_match(const struct sk_buff *skb,
1651           const struct net_device *in,
1652           const struct net_device *out,
1653           const void *matchinfo,
1654           int offset,
1655           int *hotdrop)
1656 {
1657         struct udphdr _udph, *uh;
1658         const struct ipt_udp *udpinfo = matchinfo;
1659
1660         /* Must not be a fragment. */
1661         if (offset)
1662                 return 0;
1663
1664         uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1665                                 sizeof(_udph), &_udph);
1666         if (uh == NULL) {
1667                 /* We've been asked to examine this packet, and we
1668                    can't.  Hence, no choice but to drop. */
1669                 duprintf("Dropping evil UDP tinygram.\n");
1670                 *hotdrop = 1;
1671                 return 0;
1672         }
1673
1674         return port_match(udpinfo->spts[0], udpinfo->spts[1],
1675                           ntohs(uh->source),
1676                           !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
1677                 && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1678                               ntohs(uh->dest),
1679                               !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
1680 }
1681
1682 /* Called when user tries to insert an entry of this type. */
1683 static int
1684 udp_checkentry(const char *tablename,
1685                const struct ipt_ip *ip,
1686                void *matchinfo,
1687                unsigned int matchinfosize,
1688                unsigned int hook_mask)
1689 {
1690         const struct ipt_udp *udpinfo = matchinfo;
1691
1692         /* Must specify proto == UDP, and no unknown invflags */
1693         if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
1694                 duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
1695                          IPPROTO_UDP);
1696                 return 0;
1697         }
1698         if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
1699                 duprintf("ipt_udp: matchsize %u != %u\n",
1700                          matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
1701                 return 0;
1702         }
1703         if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
1704                 duprintf("ipt_udp: unknown flags %X\n",
1705                          udpinfo->invflags);
1706                 return 0;
1707         }
1708
1709         return 1;
1710 }
1711
1712 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1713 static inline int
1714 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1715                      u_int8_t type, u_int8_t code,
1716                      int invert)
1717 {
1718         return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
1719                 ^ invert;
1720 }
1721
1722 static int
1723 icmp_match(const struct sk_buff *skb,
1724            const struct net_device *in,
1725            const struct net_device *out,
1726            const void *matchinfo,
1727            int offset,
1728            int *hotdrop)
1729 {
1730         struct icmphdr _icmph, *ic;
1731         const struct ipt_icmp *icmpinfo = matchinfo;
1732
1733         /* Must not be a fragment. */
1734         if (offset)
1735                 return 0;
1736
1737         ic = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1738                                 sizeof(_icmph), &_icmph);
1739         if (ic == NULL) {
1740                 /* We've been asked to examine this packet, and we
1741                  * can't.  Hence, no choice but to drop.
1742                  */
1743                 duprintf("Dropping evil ICMP tinygram.\n");
1744                 *hotdrop = 1;
1745                 return 0;
1746         }
1747
1748         return icmp_type_code_match(icmpinfo->type,
1749                                     icmpinfo->code[0],
1750                                     icmpinfo->code[1],
1751                                     ic->type, ic->code,
1752                                     !!(icmpinfo->invflags&IPT_ICMP_INV));
1753 }
1754
1755 /* Called when user tries to insert an entry of this type. */
1756 static int
1757 icmp_checkentry(const char *tablename,
1758            const struct ipt_ip *ip,
1759            void *matchinfo,
1760            unsigned int matchsize,
1761            unsigned int hook_mask)
1762 {
1763         const struct ipt_icmp *icmpinfo = matchinfo;
1764
1765         /* Must specify proto == ICMP, and no unknown invflags */
1766         return ip->proto == IPPROTO_ICMP
1767                 && !(ip->invflags & IPT_INV_PROTO)
1768                 && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1769                 && !(icmpinfo->invflags & ~IPT_ICMP_INV);
1770 }
1771
1772 /* The built-in targets: standard (NULL) and error. */
1773 static struct ipt_target ipt_standard_target = {
1774         .name           = IPT_STANDARD_TARGET,
1775 };
1776
1777 static struct ipt_target ipt_error_target = {
1778         .name           = IPT_ERROR_TARGET,
1779         .target         = ipt_error,
1780 };
1781
1782 static struct nf_sockopt_ops ipt_sockopts = {
1783         .pf             = PF_INET,
1784         .set_optmin     = IPT_BASE_CTL,
1785         .set_optmax     = IPT_SO_SET_MAX+1,
1786         .set            = do_ipt_set_ctl,
1787         .get_optmin     = IPT_BASE_CTL,
1788         .get_optmax     = IPT_SO_GET_MAX+1,
1789         .get            = do_ipt_get_ctl,
1790 };
1791
1792 static struct ipt_match tcp_matchstruct = {
1793         .name           = "tcp",
1794         .match          = &tcp_match,
1795         .checkentry     = &tcp_checkentry,
1796 };
1797
1798 static struct ipt_match udp_matchstruct = {
1799         .name           = "udp",
1800         .match          = &udp_match,
1801         .checkentry     = &udp_checkentry,
1802 };
1803
1804 static struct ipt_match icmp_matchstruct = {
1805         .name           = "icmp",
1806         .match          = &icmp_match,
1807         .checkentry     = &icmp_checkentry,
1808 };
1809
1810 #ifdef CONFIG_PROC_FS
1811 static inline int print_name(const char *i,
1812                              off_t start_offset, char *buffer, int length,
1813                              off_t *pos, unsigned int *count)
1814 {
1815         if ((*count)++ >= start_offset) {
1816                 unsigned int namelen;
1817
1818                 namelen = sprintf(buffer + *pos, "%s\n",
1819                                   i + sizeof(struct list_head));
1820                 if (*pos + namelen > length) {
1821                         /* Stop iterating */
1822                         return 1;
1823                 }
1824                 *pos += namelen;
1825         }
1826         return 0;
1827 }
1828
1829 static inline int print_target(const struct ipt_target *t,
1830                                off_t start_offset, char *buffer, int length,
1831                                off_t *pos, unsigned int *count)
1832 {
1833         if (t == &ipt_standard_target || t == &ipt_error_target)
1834                 return 0;
1835         return print_name((char *)t, start_offset, buffer, length, pos, count);
1836 }
1837
1838 static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
1839 {
1840         off_t pos = 0;
1841         unsigned int count = 0;
1842
1843         if (down_interruptible(&ipt_mutex) != 0)
1844                 return 0;
1845
1846         LIST_FIND(&ipt_tables, print_name, void *,
1847                   offset, buffer, length, &pos, &count);
1848
1849         up(&ipt_mutex);
1850
1851         /* `start' hack - see fs/proc/generic.c line ~105 */
1852         *start=(char *)((unsigned long)count-offset);
1853         return pos;
1854 }
1855
1856 static int ipt_get_targets(char *buffer, char **start, off_t offset, int length)
1857 {
1858         off_t pos = 0;
1859         unsigned int count = 0;
1860
1861         if (down_interruptible(&ipt_mutex) != 0)
1862                 return 0;
1863
1864         LIST_FIND(&ipt_target, print_target, struct ipt_target *,
1865                   offset, buffer, length, &pos, &count);
1866         
1867         up(&ipt_mutex);
1868
1869         *start = (char *)((unsigned long)count - offset);
1870         return pos;
1871 }
1872
1873 static int ipt_get_matches(char *buffer, char **start, off_t offset, int length)
1874 {
1875         off_t pos = 0;
1876         unsigned int count = 0;
1877
1878         if (down_interruptible(&ipt_mutex) != 0)
1879                 return 0;
1880         
1881         LIST_FIND(&ipt_match, print_name, void *,
1882                   offset, buffer, length, &pos, &count);
1883
1884         up(&ipt_mutex);
1885
1886         *start = (char *)((unsigned long)count - offset);
1887         return pos;
1888 }
1889
1890 static struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
1891 { { "ip_tables_names", ipt_get_tables },
1892   { "ip_tables_targets", ipt_get_targets },
1893   { "ip_tables_matches", ipt_get_matches },
1894   { NULL, NULL} };
1895 #endif /*CONFIG_PROC_FS*/
1896
1897 static int __init init(void)
1898 {
1899         int ret;
1900
1901         /* Noone else will be downing sem now, so we won't sleep */
1902         down(&ipt_mutex);
1903         list_append(&ipt_target, &ipt_standard_target);
1904         list_append(&ipt_target, &ipt_error_target);
1905         list_append(&ipt_match, &tcp_matchstruct);
1906         list_append(&ipt_match, &udp_matchstruct);
1907         list_append(&ipt_match, &icmp_matchstruct);
1908         up(&ipt_mutex);
1909
1910         /* Register setsockopt */
1911         ret = nf_register_sockopt(&ipt_sockopts);
1912         if (ret < 0) {
1913                 duprintf("Unable to register sockopts.\n");
1914                 return ret;
1915         }
1916
1917 #ifdef CONFIG_PROC_FS
1918         {
1919         struct proc_dir_entry *proc;
1920         int i;
1921
1922         for (i = 0; ipt_proc_entry[i].name; i++) {
1923                 proc = proc_net_create(ipt_proc_entry[i].name, 0,
1924                                        ipt_proc_entry[i].get_info);
1925                 if (!proc) {
1926                         while (--i >= 0)
1927                                 proc_net_remove(ipt_proc_entry[i].name);
1928                         nf_unregister_sockopt(&ipt_sockopts);
1929                         return -ENOMEM;
1930                 }
1931                 proc->owner = THIS_MODULE;
1932         }
1933         }
1934 #endif
1935
1936         printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
1937         return 0;
1938 }
1939
1940 static void __exit fini(void)
1941 {
1942         nf_unregister_sockopt(&ipt_sockopts);
1943 #ifdef CONFIG_PROC_FS
1944         {
1945         int i;
1946         for (i = 0; ipt_proc_entry[i].name; i++)
1947                 proc_net_remove(ipt_proc_entry[i].name);
1948         }
1949 #endif
1950 }
1951
1952 EXPORT_SYMBOL(ipt_register_table);
1953 EXPORT_SYMBOL(ipt_unregister_table);
1954 EXPORT_SYMBOL(ipt_register_match);
1955 EXPORT_SYMBOL(ipt_unregister_match);
1956 EXPORT_SYMBOL(ipt_do_table);
1957 EXPORT_SYMBOL(ipt_register_target);
1958 EXPORT_SYMBOL(ipt_unregister_target);
1959 EXPORT_SYMBOL(ipt_find_target);
1960
1961 module_init(init);
1962 module_exit(fini);