]> err.no Git - linux-2.6/blob - net/ipv4/netfilter/ip_conntrack_core.c
/spare/repo/netdev-2.6 branch 'ieee80211'
[linux-2.6] / net / ipv4 / netfilter / ip_conntrack_core.c
1 /* Connection state tracking for netfilter.  This is separated from,
2    but required by, the NAT layer; it can also be used by an iptables
3    extension. */
4
5 /* (C) 1999-2001 Paul `Rusty' Russell  
6  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 as
10  * published by the Free Software Foundation.
11  *
12  * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
13  *      - new API and handling of conntrack/nat helpers
14  *      - now capable of multiple expectations for one master
15  * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
16  *      - add usage/reference counts to ip_conntrack_expect
17  *      - export ip_conntrack[_expect]_{find_get,put} functions
18  * */
19
20 #include <linux/config.h>
21 #include <linux/types.h>
22 #include <linux/icmp.h>
23 #include <linux/ip.h>
24 #include <linux/netfilter.h>
25 #include <linux/netfilter_ipv4.h>
26 #include <linux/module.h>
27 #include <linux/skbuff.h>
28 #include <linux/proc_fs.h>
29 #include <linux/vmalloc.h>
30 #include <net/checksum.h>
31 #include <net/ip.h>
32 #include <linux/stddef.h>
33 #include <linux/sysctl.h>
34 #include <linux/slab.h>
35 #include <linux/random.h>
36 #include <linux/jhash.h>
37 #include <linux/err.h>
38 #include <linux/percpu.h>
39 #include <linux/moduleparam.h>
40
41 /* ip_conntrack_lock protects the main hash table, protocol/helper/expected
42    registrations, conntrack timers*/
43 #define ASSERT_READ_LOCK(x)
44 #define ASSERT_WRITE_LOCK(x)
45
46 #include <linux/netfilter_ipv4/ip_conntrack.h>
47 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
48 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
49 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
50 #include <linux/netfilter_ipv4/listhelp.h>
51
52 #define IP_CONNTRACK_VERSION    "2.1"
53
54 #if 0
55 #define DEBUGP printk
56 #else
57 #define DEBUGP(format, args...)
58 #endif
59
60 DEFINE_RWLOCK(ip_conntrack_lock);
61
62 /* ip_conntrack_standalone needs this */
63 atomic_t ip_conntrack_count = ATOMIC_INIT(0);
64
65 void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
66 LIST_HEAD(ip_conntrack_expect_list);
67 struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
68 static LIST_HEAD(helpers);
69 unsigned int ip_conntrack_htable_size = 0;
70 int ip_conntrack_max;
71 struct list_head *ip_conntrack_hash;
72 static kmem_cache_t *ip_conntrack_cachep;
73 static kmem_cache_t *ip_conntrack_expect_cachep;
74 struct ip_conntrack ip_conntrack_untracked;
75 unsigned int ip_ct_log_invalid;
76 static LIST_HEAD(unconfirmed);
77 static int ip_conntrack_vmalloc;
78
79 DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
80
81 void 
82 ip_conntrack_put(struct ip_conntrack *ct)
83 {
84         IP_NF_ASSERT(ct);
85         nf_conntrack_put(&ct->ct_general);
86 }
87
88 static int ip_conntrack_hash_rnd_initted;
89 static unsigned int ip_conntrack_hash_rnd;
90
91 static u_int32_t
92 hash_conntrack(const struct ip_conntrack_tuple *tuple)
93 {
94 #if 0
95         dump_tuple(tuple);
96 #endif
97         return (jhash_3words(tuple->src.ip,
98                              (tuple->dst.ip ^ tuple->dst.protonum),
99                              (tuple->src.u.all | (tuple->dst.u.all << 16)),
100                              ip_conntrack_hash_rnd) % ip_conntrack_htable_size);
101 }
102
103 int
104 ip_ct_get_tuple(const struct iphdr *iph,
105                 const struct sk_buff *skb,
106                 unsigned int dataoff,
107                 struct ip_conntrack_tuple *tuple,
108                 const struct ip_conntrack_protocol *protocol)
109 {
110         /* Never happen */
111         if (iph->frag_off & htons(IP_OFFSET)) {
112                 printk("ip_conntrack_core: Frag of proto %u.\n",
113                        iph->protocol);
114                 return 0;
115         }
116
117         tuple->src.ip = iph->saddr;
118         tuple->dst.ip = iph->daddr;
119         tuple->dst.protonum = iph->protocol;
120         tuple->dst.dir = IP_CT_DIR_ORIGINAL;
121
122         return protocol->pkt_to_tuple(skb, dataoff, tuple);
123 }
124
125 int
126 ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse,
127                    const struct ip_conntrack_tuple *orig,
128                    const struct ip_conntrack_protocol *protocol)
129 {
130         inverse->src.ip = orig->dst.ip;
131         inverse->dst.ip = orig->src.ip;
132         inverse->dst.protonum = orig->dst.protonum;
133         inverse->dst.dir = !orig->dst.dir;
134
135         return protocol->invert_tuple(inverse, orig);
136 }
137
138
139 /* ip_conntrack_expect helper functions */
140 static void unlink_expect(struct ip_conntrack_expect *exp)
141 {
142         ASSERT_WRITE_LOCK(&ip_conntrack_lock);
143         IP_NF_ASSERT(!timer_pending(&exp->timeout));
144         list_del(&exp->list);
145         CONNTRACK_STAT_INC(expect_delete);
146         exp->master->expecting--;
147 }
148
149 static void expectation_timed_out(unsigned long ul_expect)
150 {
151         struct ip_conntrack_expect *exp = (void *)ul_expect;
152
153         write_lock_bh(&ip_conntrack_lock);
154         unlink_expect(exp);
155         write_unlock_bh(&ip_conntrack_lock);
156         ip_conntrack_expect_put(exp);
157 }
158
159 /* If an expectation for this connection is found, it gets delete from
160  * global list then returned. */
161 static struct ip_conntrack_expect *
162 find_expectation(const struct ip_conntrack_tuple *tuple)
163 {
164         struct ip_conntrack_expect *i;
165
166         list_for_each_entry(i, &ip_conntrack_expect_list, list) {
167                 /* If master is not in hash table yet (ie. packet hasn't left
168                    this machine yet), how can other end know about expected?
169                    Hence these are not the droids you are looking for (if
170                    master ct never got confirmed, we'd hold a reference to it
171                    and weird things would happen to future packets). */
172                 if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
173                     && is_confirmed(i->master)
174                     && del_timer(&i->timeout)) {
175                         unlink_expect(i);
176                         return i;
177                 }
178         }
179         return NULL;
180 }
181
182 /* delete all expectations for this conntrack */
183 static void remove_expectations(struct ip_conntrack *ct)
184 {
185         struct ip_conntrack_expect *i, *tmp;
186
187         /* Optimization: most connection never expect any others. */
188         if (ct->expecting == 0)
189                 return;
190
191         list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) {
192                 if (i->master == ct && del_timer(&i->timeout)) {
193                         unlink_expect(i);
194                         ip_conntrack_expect_put(i);
195                 }
196         }
197 }
198
199 static void
200 clean_from_lists(struct ip_conntrack *ct)
201 {
202         unsigned int ho, hr;
203         
204         DEBUGP("clean_from_lists(%p)\n", ct);
205         ASSERT_WRITE_LOCK(&ip_conntrack_lock);
206
207         ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
208         hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
209         LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
210         LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
211
212         /* Destroy all pending expectations */
213         remove_expectations(ct);
214 }
215
216 static void
217 destroy_conntrack(struct nf_conntrack *nfct)
218 {
219         struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
220         struct ip_conntrack_protocol *proto;
221
222         DEBUGP("destroy_conntrack(%p)\n", ct);
223         IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
224         IP_NF_ASSERT(!timer_pending(&ct->timeout));
225
226         /* To make sure we don't get any weird locking issues here:
227          * destroy_conntrack() MUST NOT be called with a write lock
228          * to ip_conntrack_lock!!! -HW */
229         proto = ip_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
230         if (proto && proto->destroy)
231                 proto->destroy(ct);
232
233         if (ip_conntrack_destroyed)
234                 ip_conntrack_destroyed(ct);
235
236         write_lock_bh(&ip_conntrack_lock);
237         /* Expectations will have been removed in clean_from_lists,
238          * except TFTP can create an expectation on the first packet,
239          * before connection is in the list, so we need to clean here,
240          * too. */
241         remove_expectations(ct);
242
243         /* We overload first tuple to link into unconfirmed list. */
244         if (!is_confirmed(ct)) {
245                 BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list));
246                 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
247         }
248
249         CONNTRACK_STAT_INC(delete);
250         write_unlock_bh(&ip_conntrack_lock);
251
252         if (ct->master)
253                 ip_conntrack_put(ct->master);
254
255         DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
256         kmem_cache_free(ip_conntrack_cachep, ct);
257         atomic_dec(&ip_conntrack_count);
258 }
259
260 static void death_by_timeout(unsigned long ul_conntrack)
261 {
262         struct ip_conntrack *ct = (void *)ul_conntrack;
263
264         write_lock_bh(&ip_conntrack_lock);
265         /* Inside lock so preempt is disabled on module removal path.
266          * Otherwise we can get spurious warnings. */
267         CONNTRACK_STAT_INC(delete_list);
268         clean_from_lists(ct);
269         write_unlock_bh(&ip_conntrack_lock);
270         ip_conntrack_put(ct);
271 }
272
273 static inline int
274 conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
275                     const struct ip_conntrack_tuple *tuple,
276                     const struct ip_conntrack *ignored_conntrack)
277 {
278         ASSERT_READ_LOCK(&ip_conntrack_lock);
279         return tuplehash_to_ctrack(i) != ignored_conntrack
280                 && ip_ct_tuple_equal(tuple, &i->tuple);
281 }
282
283 static struct ip_conntrack_tuple_hash *
284 __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
285                     const struct ip_conntrack *ignored_conntrack)
286 {
287         struct ip_conntrack_tuple_hash *h;
288         unsigned int hash = hash_conntrack(tuple);
289
290         ASSERT_READ_LOCK(&ip_conntrack_lock);
291         list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
292                 if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
293                         CONNTRACK_STAT_INC(found);
294                         return h;
295                 }
296                 CONNTRACK_STAT_INC(searched);
297         }
298
299         return NULL;
300 }
301
302 /* Find a connection corresponding to a tuple. */
303 struct ip_conntrack_tuple_hash *
304 ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
305                       const struct ip_conntrack *ignored_conntrack)
306 {
307         struct ip_conntrack_tuple_hash *h;
308
309         read_lock_bh(&ip_conntrack_lock);
310         h = __ip_conntrack_find(tuple, ignored_conntrack);
311         if (h)
312                 atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
313         read_unlock_bh(&ip_conntrack_lock);
314
315         return h;
316 }
317
318 /* Confirm a connection given skb; places it in hash table */
319 int
320 __ip_conntrack_confirm(struct sk_buff **pskb)
321 {
322         unsigned int hash, repl_hash;
323         struct ip_conntrack *ct;
324         enum ip_conntrack_info ctinfo;
325
326         ct = ip_conntrack_get(*pskb, &ctinfo);
327
328         /* ipt_REJECT uses ip_conntrack_attach to attach related
329            ICMP/TCP RST packets in other direction.  Actual packet
330            which created connection will be IP_CT_NEW or for an
331            expected connection, IP_CT_RELATED. */
332         if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
333                 return NF_ACCEPT;
334
335         hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
336         repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
337
338         /* We're not in hash table, and we refuse to set up related
339            connections for unconfirmed conns.  But packet copies and
340            REJECT will give spurious warnings here. */
341         /* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
342
343         /* No external references means noone else could have
344            confirmed us. */
345         IP_NF_ASSERT(!is_confirmed(ct));
346         DEBUGP("Confirming conntrack %p\n", ct);
347
348         write_lock_bh(&ip_conntrack_lock);
349
350         /* See if there's one in the list already, including reverse:
351            NAT could have grabbed it without realizing, since we're
352            not in the hash.  If there is, we lost race. */
353         if (!LIST_FIND(&ip_conntrack_hash[hash],
354                        conntrack_tuple_cmp,
355                        struct ip_conntrack_tuple_hash *,
356                        &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
357             && !LIST_FIND(&ip_conntrack_hash[repl_hash],
358                           conntrack_tuple_cmp,
359                           struct ip_conntrack_tuple_hash *,
360                           &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
361                 /* Remove from unconfirmed list */
362                 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
363
364                 list_prepend(&ip_conntrack_hash[hash],
365                              &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
366                 list_prepend(&ip_conntrack_hash[repl_hash],
367                              &ct->tuplehash[IP_CT_DIR_REPLY]);
368                 /* Timer relative to confirmation time, not original
369                    setting time, otherwise we'd get timer wrap in
370                    weird delay cases. */
371                 ct->timeout.expires += jiffies;
372                 add_timer(&ct->timeout);
373                 atomic_inc(&ct->ct_general.use);
374                 set_bit(IPS_CONFIRMED_BIT, &ct->status);
375                 CONNTRACK_STAT_INC(insert);
376                 write_unlock_bh(&ip_conntrack_lock);
377                 return NF_ACCEPT;
378         }
379
380         CONNTRACK_STAT_INC(insert_failed);
381         write_unlock_bh(&ip_conntrack_lock);
382
383         return NF_DROP;
384 }
385
386 /* Returns true if a connection correspondings to the tuple (required
387    for NAT). */
388 int
389 ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
390                          const struct ip_conntrack *ignored_conntrack)
391 {
392         struct ip_conntrack_tuple_hash *h;
393
394         read_lock_bh(&ip_conntrack_lock);
395         h = __ip_conntrack_find(tuple, ignored_conntrack);
396         read_unlock_bh(&ip_conntrack_lock);
397
398         return h != NULL;
399 }
400
401 /* There's a small race here where we may free a just-assured
402    connection.  Too bad: we're in trouble anyway. */
403 static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
404 {
405         return !(test_bit(IPS_ASSURED_BIT, &tuplehash_to_ctrack(i)->status));
406 }
407
408 static int early_drop(struct list_head *chain)
409 {
410         /* Traverse backwards: gives us oldest, which is roughly LRU */
411         struct ip_conntrack_tuple_hash *h;
412         struct ip_conntrack *ct = NULL;
413         int dropped = 0;
414
415         read_lock_bh(&ip_conntrack_lock);
416         h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *);
417         if (h) {
418                 ct = tuplehash_to_ctrack(h);
419                 atomic_inc(&ct->ct_general.use);
420         }
421         read_unlock_bh(&ip_conntrack_lock);
422
423         if (!ct)
424                 return dropped;
425
426         if (del_timer(&ct->timeout)) {
427                 death_by_timeout((unsigned long)ct);
428                 dropped = 1;
429                 CONNTRACK_STAT_INC(early_drop);
430         }
431         ip_conntrack_put(ct);
432         return dropped;
433 }
434
435 static inline int helper_cmp(const struct ip_conntrack_helper *i,
436                              const struct ip_conntrack_tuple *rtuple)
437 {
438         return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
439 }
440
441 static struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple)
442 {
443         return LIST_FIND(&helpers, helper_cmp,
444                          struct ip_conntrack_helper *,
445                          tuple);
446 }
447
448 /* Allocate a new conntrack: we return -ENOMEM if classification
449    failed due to stress.  Otherwise it really is unclassifiable. */
450 static struct ip_conntrack_tuple_hash *
451 init_conntrack(const struct ip_conntrack_tuple *tuple,
452                struct ip_conntrack_protocol *protocol,
453                struct sk_buff *skb)
454 {
455         struct ip_conntrack *conntrack;
456         struct ip_conntrack_tuple repl_tuple;
457         size_t hash;
458         struct ip_conntrack_expect *exp;
459
460         if (!ip_conntrack_hash_rnd_initted) {
461                 get_random_bytes(&ip_conntrack_hash_rnd, 4);
462                 ip_conntrack_hash_rnd_initted = 1;
463         }
464
465         hash = hash_conntrack(tuple);
466
467         if (ip_conntrack_max
468             && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
469                 /* Try dropping from this hash chain. */
470                 if (!early_drop(&ip_conntrack_hash[hash])) {
471                         if (net_ratelimit())
472                                 printk(KERN_WARNING
473                                        "ip_conntrack: table full, dropping"
474                                        " packet.\n");
475                         return ERR_PTR(-ENOMEM);
476                 }
477         }
478
479         if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) {
480                 DEBUGP("Can't invert tuple.\n");
481                 return NULL;
482         }
483
484         conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
485         if (!conntrack) {
486                 DEBUGP("Can't allocate conntrack.\n");
487                 return ERR_PTR(-ENOMEM);
488         }
489
490         memset(conntrack, 0, sizeof(*conntrack));
491         atomic_set(&conntrack->ct_general.use, 1);
492         conntrack->ct_general.destroy = destroy_conntrack;
493         conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple;
494         conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple;
495         if (!protocol->new(conntrack, skb)) {
496                 kmem_cache_free(ip_conntrack_cachep, conntrack);
497                 return NULL;
498         }
499         /* Don't set timer yet: wait for confirmation */
500         init_timer(&conntrack->timeout);
501         conntrack->timeout.data = (unsigned long)conntrack;
502         conntrack->timeout.function = death_by_timeout;
503
504         write_lock_bh(&ip_conntrack_lock);
505         exp = find_expectation(tuple);
506
507         if (exp) {
508                 DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
509                         conntrack, exp);
510                 /* Welcome, Mr. Bond.  We've been expecting you... */
511                 __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
512                 conntrack->master = exp->master;
513 #ifdef CONFIG_IP_NF_CONNTRACK_MARK
514                 conntrack->mark = exp->master->mark;
515 #endif
516 #if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
517     defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
518                 /* this is ugly, but there is no other place where to put it */
519                 conntrack->nat.masq_index = exp->master->nat.masq_index;
520 #endif
521                 nf_conntrack_get(&conntrack->master->ct_general);
522                 CONNTRACK_STAT_INC(expect_new);
523         } else {
524                 conntrack->helper = ip_ct_find_helper(&repl_tuple);
525
526                 CONNTRACK_STAT_INC(new);
527         }
528
529         /* Overload tuple linked list to put us in unconfirmed list. */
530         list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
531
532         atomic_inc(&ip_conntrack_count);
533         write_unlock_bh(&ip_conntrack_lock);
534
535         if (exp) {
536                 if (exp->expectfn)
537                         exp->expectfn(conntrack, exp);
538                 ip_conntrack_expect_put(exp);
539         }
540
541         return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
542 }
543
544 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
545 static inline struct ip_conntrack *
546 resolve_normal_ct(struct sk_buff *skb,
547                   struct ip_conntrack_protocol *proto,
548                   int *set_reply,
549                   unsigned int hooknum,
550                   enum ip_conntrack_info *ctinfo)
551 {
552         struct ip_conntrack_tuple tuple;
553         struct ip_conntrack_tuple_hash *h;
554         struct ip_conntrack *ct;
555
556         IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
557
558         if (!ip_ct_get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4, 
559                                 &tuple,proto))
560                 return NULL;
561
562         /* look for tuple match */
563         h = ip_conntrack_find_get(&tuple, NULL);
564         if (!h) {
565                 h = init_conntrack(&tuple, proto, skb);
566                 if (!h)
567                         return NULL;
568                 if (IS_ERR(h))
569                         return (void *)h;
570         }
571         ct = tuplehash_to_ctrack(h);
572
573         /* It exists; we have (non-exclusive) reference. */
574         if (DIRECTION(h) == IP_CT_DIR_REPLY) {
575                 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
576                 /* Please set reply bit if this packet OK */
577                 *set_reply = 1;
578         } else {
579                 /* Once we've had two way comms, always ESTABLISHED. */
580                 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
581                         DEBUGP("ip_conntrack_in: normal packet for %p\n",
582                                ct);
583                         *ctinfo = IP_CT_ESTABLISHED;
584                 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
585                         DEBUGP("ip_conntrack_in: related packet for %p\n",
586                                ct);
587                         *ctinfo = IP_CT_RELATED;
588                 } else {
589                         DEBUGP("ip_conntrack_in: new packet for %p\n",
590                                ct);
591                         *ctinfo = IP_CT_NEW;
592                 }
593                 *set_reply = 0;
594         }
595         skb->nfct = &ct->ct_general;
596         skb->nfctinfo = *ctinfo;
597         return ct;
598 }
599
600 /* Netfilter hook itself. */
601 unsigned int ip_conntrack_in(unsigned int hooknum,
602                              struct sk_buff **pskb,
603                              const struct net_device *in,
604                              const struct net_device *out,
605                              int (*okfn)(struct sk_buff *))
606 {
607         struct ip_conntrack *ct;
608         enum ip_conntrack_info ctinfo;
609         struct ip_conntrack_protocol *proto;
610         int set_reply;
611         int ret;
612
613         /* Previously seen (loopback or untracked)?  Ignore. */
614         if ((*pskb)->nfct) {
615                 CONNTRACK_STAT_INC(ignore);
616                 return NF_ACCEPT;
617         }
618
619         /* Never happen */
620         if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
621                 if (net_ratelimit()) {
622                 printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n",
623                        (*pskb)->nh.iph->protocol, hooknum);
624                 }
625                 return NF_DROP;
626         }
627
628         /* FIXME: Do this right please. --RR */
629         (*pskb)->nfcache |= NFC_UNKNOWN;
630
631 /* Doesn't cover locally-generated broadcast, so not worth it. */
632 #if 0
633         /* Ignore broadcast: no `connection'. */
634         if ((*pskb)->pkt_type == PACKET_BROADCAST) {
635                 printk("Broadcast packet!\n");
636                 return NF_ACCEPT;
637         } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF)) 
638                    == htonl(0x000000FF)) {
639                 printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n",
640                        NIPQUAD((*pskb)->nh.iph->saddr),
641                        NIPQUAD((*pskb)->nh.iph->daddr),
642                        (*pskb)->sk, (*pskb)->pkt_type);
643         }
644 #endif
645
646         proto = ip_ct_find_proto((*pskb)->nh.iph->protocol);
647
648         /* It may be an special packet, error, unclean...
649          * inverse of the return code tells to the netfilter
650          * core what to do with the packet. */
651         if (proto->error != NULL 
652             && (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) {
653                 CONNTRACK_STAT_INC(error);
654                 CONNTRACK_STAT_INC(invalid);
655                 return -ret;
656         }
657
658         if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) {
659                 /* Not valid part of a connection */
660                 CONNTRACK_STAT_INC(invalid);
661                 return NF_ACCEPT;
662         }
663
664         if (IS_ERR(ct)) {
665                 /* Too stressed to deal. */
666                 CONNTRACK_STAT_INC(drop);
667                 return NF_DROP;
668         }
669
670         IP_NF_ASSERT((*pskb)->nfct);
671
672         ret = proto->packet(ct, *pskb, ctinfo);
673         if (ret < 0) {
674                 /* Invalid: inverse of the return code tells
675                  * the netfilter core what to do*/
676                 nf_conntrack_put((*pskb)->nfct);
677                 (*pskb)->nfct = NULL;
678                 CONNTRACK_STAT_INC(invalid);
679                 return -ret;
680         }
681
682         if (set_reply)
683                 set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
684
685         return ret;
686 }
687
688 int invert_tuplepr(struct ip_conntrack_tuple *inverse,
689                    const struct ip_conntrack_tuple *orig)
690 {
691         return ip_ct_invert_tuple(inverse, orig, 
692                                   ip_ct_find_proto(orig->dst.protonum));
693 }
694
695 /* Would two expected things clash? */
696 static inline int expect_clash(const struct ip_conntrack_expect *a,
697                                const struct ip_conntrack_expect *b)
698 {
699         /* Part covered by intersection of masks must be unequal,
700            otherwise they clash */
701         struct ip_conntrack_tuple intersect_mask
702                 = { { a->mask.src.ip & b->mask.src.ip,
703                       { a->mask.src.u.all & b->mask.src.u.all } },
704                     { a->mask.dst.ip & b->mask.dst.ip,
705                       { a->mask.dst.u.all & b->mask.dst.u.all },
706                       a->mask.dst.protonum & b->mask.dst.protonum } };
707
708         return ip_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
709 }
710
711 static inline int expect_matches(const struct ip_conntrack_expect *a,
712                                  const struct ip_conntrack_expect *b)
713 {
714         return a->master == b->master
715                 && ip_ct_tuple_equal(&a->tuple, &b->tuple)
716                 && ip_ct_tuple_equal(&a->mask, &b->mask);
717 }
718
719 /* Generally a bad idea to call this: could have matched already. */
720 void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp)
721 {
722         struct ip_conntrack_expect *i;
723
724         write_lock_bh(&ip_conntrack_lock);
725         /* choose the the oldest expectation to evict */
726         list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
727                 if (expect_matches(i, exp) && del_timer(&i->timeout)) {
728                         unlink_expect(i);
729                         write_unlock_bh(&ip_conntrack_lock);
730                         ip_conntrack_expect_put(i);
731                         return;
732                 }
733         }
734         write_unlock_bh(&ip_conntrack_lock);
735 }
736
737 struct ip_conntrack_expect *ip_conntrack_expect_alloc(struct ip_conntrack *me)
738 {
739         struct ip_conntrack_expect *new;
740
741         new = kmem_cache_alloc(ip_conntrack_expect_cachep, GFP_ATOMIC);
742         if (!new) {
743                 DEBUGP("expect_related: OOM allocating expect\n");
744                 return NULL;
745         }
746         new->master = me;
747         atomic_inc(&new->master->ct_general.use);
748         atomic_set(&new->use, 1);
749         return new;
750 }
751
752 void ip_conntrack_expect_put(struct ip_conntrack_expect *exp)
753 {
754         if (atomic_dec_and_test(&exp->use)) {
755                 ip_conntrack_put(exp->master);
756                 kmem_cache_free(ip_conntrack_expect_cachep, exp);
757         }
758 }
759
760 static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
761 {
762         atomic_inc(&exp->use);
763         exp->master->expecting++;
764         list_add(&exp->list, &ip_conntrack_expect_list);
765
766         init_timer(&exp->timeout);
767         exp->timeout.data = (unsigned long)exp;
768         exp->timeout.function = expectation_timed_out;
769         exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ;
770         add_timer(&exp->timeout);
771
772         CONNTRACK_STAT_INC(expect_create);
773 }
774
775 /* Race with expectations being used means we could have none to find; OK. */
776 static void evict_oldest_expect(struct ip_conntrack *master)
777 {
778         struct ip_conntrack_expect *i;
779
780         list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
781                 if (i->master == master) {
782                         if (del_timer(&i->timeout)) {
783                                 unlink_expect(i);
784                                 ip_conntrack_expect_put(i);
785                         }
786                         break;
787                 }
788         }
789 }
790
791 static inline int refresh_timer(struct ip_conntrack_expect *i)
792 {
793         if (!del_timer(&i->timeout))
794                 return 0;
795
796         i->timeout.expires = jiffies + i->master->helper->timeout*HZ;
797         add_timer(&i->timeout);
798         return 1;
799 }
800
801 int ip_conntrack_expect_related(struct ip_conntrack_expect *expect)
802 {
803         struct ip_conntrack_expect *i;
804         int ret;
805
806         DEBUGP("ip_conntrack_expect_related %p\n", related_to);
807         DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
808         DEBUGP("mask:  "); DUMP_TUPLE(&expect->mask);
809
810         write_lock_bh(&ip_conntrack_lock);
811         list_for_each_entry(i, &ip_conntrack_expect_list, list) {
812                 if (expect_matches(i, expect)) {
813                         /* Refresh timer: if it's dying, ignore.. */
814                         if (refresh_timer(i)) {
815                                 ret = 0;
816                                 goto out;
817                         }
818                 } else if (expect_clash(i, expect)) {
819                         ret = -EBUSY;
820                         goto out;
821                 }
822         }
823
824         /* Will be over limit? */
825         if (expect->master->helper->max_expected && 
826             expect->master->expecting >= expect->master->helper->max_expected)
827                 evict_oldest_expect(expect->master);
828
829         ip_conntrack_expect_insert(expect);
830         ret = 0;
831 out:
832         write_unlock_bh(&ip_conntrack_lock);
833         return ret;
834 }
835
836 /* Alter reply tuple (maybe alter helper).  This is for NAT, and is
837    implicitly racy: see __ip_conntrack_confirm */
838 void ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
839                               const struct ip_conntrack_tuple *newreply)
840 {
841         write_lock_bh(&ip_conntrack_lock);
842         /* Should be unconfirmed, so not in hash table yet */
843         IP_NF_ASSERT(!is_confirmed(conntrack));
844
845         DEBUGP("Altering reply tuple of %p to ", conntrack);
846         DUMP_TUPLE(newreply);
847
848         conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
849         if (!conntrack->master && conntrack->expecting == 0)
850                 conntrack->helper = ip_ct_find_helper(newreply);
851         write_unlock_bh(&ip_conntrack_lock);
852 }
853
854 int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
855 {
856         BUG_ON(me->timeout == 0);
857         write_lock_bh(&ip_conntrack_lock);
858         list_prepend(&helpers, me);
859         write_unlock_bh(&ip_conntrack_lock);
860
861         return 0;
862 }
863
864 static inline int unhelp(struct ip_conntrack_tuple_hash *i,
865                          const struct ip_conntrack_helper *me)
866 {
867         if (tuplehash_to_ctrack(i)->helper == me)
868                 tuplehash_to_ctrack(i)->helper = NULL;
869         return 0;
870 }
871
872 void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
873 {
874         unsigned int i;
875         struct ip_conntrack_expect *exp, *tmp;
876
877         /* Need write lock here, to delete helper. */
878         write_lock_bh(&ip_conntrack_lock);
879         LIST_DELETE(&helpers, me);
880
881         /* Get rid of expectations */
882         list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
883                 if (exp->master->helper == me && del_timer(&exp->timeout)) {
884                         unlink_expect(exp);
885                         ip_conntrack_expect_put(exp);
886                 }
887         }
888         /* Get rid of expecteds, set helpers to NULL. */
889         LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me);
890         for (i = 0; i < ip_conntrack_htable_size; i++)
891                 LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
892                             struct ip_conntrack_tuple_hash *, me);
893         write_unlock_bh(&ip_conntrack_lock);
894
895         /* Someone could be still looking at the helper in a bh. */
896         synchronize_net();
897 }
898
899 static inline void ct_add_counters(struct ip_conntrack *ct,
900                                    enum ip_conntrack_info ctinfo,
901                                    const struct sk_buff *skb)
902 {
903 #ifdef CONFIG_IP_NF_CT_ACCT
904         if (skb) {
905                 ct->counters[CTINFO2DIR(ctinfo)].packets++;
906                 ct->counters[CTINFO2DIR(ctinfo)].bytes += 
907                                         ntohs(skb->nh.iph->tot_len);
908         }
909 #endif
910 }
911
912 /* Refresh conntrack for this many jiffies and do accounting (if skb != NULL) */
913 void ip_ct_refresh_acct(struct ip_conntrack *ct, 
914                         enum ip_conntrack_info ctinfo,
915                         const struct sk_buff *skb,
916                         unsigned long extra_jiffies)
917 {
918         IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
919
920         /* If not in hash table, timer will not be active yet */
921         if (!is_confirmed(ct)) {
922                 ct->timeout.expires = extra_jiffies;
923                 ct_add_counters(ct, ctinfo, skb);
924         } else {
925                 write_lock_bh(&ip_conntrack_lock);
926                 /* Need del_timer for race avoidance (may already be dying). */
927                 if (del_timer(&ct->timeout)) {
928                         ct->timeout.expires = jiffies + extra_jiffies;
929                         add_timer(&ct->timeout);
930                 }
931                 ct_add_counters(ct, ctinfo, skb);
932                 write_unlock_bh(&ip_conntrack_lock);
933         }
934 }
935
936 /* Returns new sk_buff, or NULL */
937 struct sk_buff *
938 ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
939 {
940         skb_orphan(skb);
941
942         local_bh_disable(); 
943         skb = ip_defrag(skb, user);
944         local_bh_enable();
945
946         if (skb) {
947                 ip_send_check(skb->nh.iph);
948                 skb->nfcache |= NFC_ALTERED;
949         }
950         return skb;
951 }
952
953 /* Used by ipt_REJECT. */
954 static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
955 {
956         struct ip_conntrack *ct;
957         enum ip_conntrack_info ctinfo;
958
959         /* This ICMP is in reverse direction to the packet which caused it */
960         ct = ip_conntrack_get(skb, &ctinfo);
961         
962         if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
963                 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
964         else
965                 ctinfo = IP_CT_RELATED;
966
967         /* Attach to new skbuff, and increment count */
968         nskb->nfct = &ct->ct_general;
969         nskb->nfctinfo = ctinfo;
970         nf_conntrack_get(nskb->nfct);
971 }
972
973 static inline int
974 do_iter(const struct ip_conntrack_tuple_hash *i,
975         int (*iter)(struct ip_conntrack *i, void *data),
976         void *data)
977 {
978         return iter(tuplehash_to_ctrack(i), data);
979 }
980
981 /* Bring out ya dead! */
982 static struct ip_conntrack_tuple_hash *
983 get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data),
984                 void *data, unsigned int *bucket)
985 {
986         struct ip_conntrack_tuple_hash *h = NULL;
987
988         write_lock_bh(&ip_conntrack_lock);
989         for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
990                 h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter,
991                                 struct ip_conntrack_tuple_hash *, iter, data);
992                 if (h)
993                         break;
994         }
995         if (!h)
996                 h = LIST_FIND_W(&unconfirmed, do_iter,
997                                 struct ip_conntrack_tuple_hash *, iter, data);
998         if (h)
999                 atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
1000         write_unlock_bh(&ip_conntrack_lock);
1001
1002         return h;
1003 }
1004
1005 void
1006 ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data)
1007 {
1008         struct ip_conntrack_tuple_hash *h;
1009         unsigned int bucket = 0;
1010
1011         while ((h = get_next_corpse(iter, data, &bucket)) != NULL) {
1012                 struct ip_conntrack *ct = tuplehash_to_ctrack(h);
1013                 /* Time to push up daises... */
1014                 if (del_timer(&ct->timeout))
1015                         death_by_timeout((unsigned long)ct);
1016                 /* ... else the timer will get him soon. */
1017
1018                 ip_conntrack_put(ct);
1019         }
1020 }
1021
1022 /* Fast function for those who don't want to parse /proc (and I don't
1023    blame them). */
1024 /* Reversing the socket's dst/src point of view gives us the reply
1025    mapping. */
1026 static int
1027 getorigdst(struct sock *sk, int optval, void __user *user, int *len)
1028 {
1029         struct inet_sock *inet = inet_sk(sk);
1030         struct ip_conntrack_tuple_hash *h;
1031         struct ip_conntrack_tuple tuple;
1032         
1033         IP_CT_TUPLE_U_BLANK(&tuple);
1034         tuple.src.ip = inet->rcv_saddr;
1035         tuple.src.u.tcp.port = inet->sport;
1036         tuple.dst.ip = inet->daddr;
1037         tuple.dst.u.tcp.port = inet->dport;
1038         tuple.dst.protonum = IPPROTO_TCP;
1039
1040         /* We only do TCP at the moment: is there a better way? */
1041         if (strcmp(sk->sk_prot->name, "TCP")) {
1042                 DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
1043                 return -ENOPROTOOPT;
1044         }
1045
1046         if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
1047                 DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
1048                        *len, sizeof(struct sockaddr_in));
1049                 return -EINVAL;
1050         }
1051
1052         h = ip_conntrack_find_get(&tuple, NULL);
1053         if (h) {
1054                 struct sockaddr_in sin;
1055                 struct ip_conntrack *ct = tuplehash_to_ctrack(h);
1056
1057                 sin.sin_family = AF_INET;
1058                 sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
1059                         .tuple.dst.u.tcp.port;
1060                 sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
1061                         .tuple.dst.ip;
1062
1063                 DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
1064                        NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
1065                 ip_conntrack_put(ct);
1066                 if (copy_to_user(user, &sin, sizeof(sin)) != 0)
1067                         return -EFAULT;
1068                 else
1069                         return 0;
1070         }
1071         DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
1072                NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
1073                NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
1074         return -ENOENT;
1075 }
1076
1077 static struct nf_sockopt_ops so_getorigdst = {
1078         .pf             = PF_INET,
1079         .get_optmin     = SO_ORIGINAL_DST,
1080         .get_optmax     = SO_ORIGINAL_DST+1,
1081         .get            = &getorigdst,
1082 };
1083
1084 static int kill_all(struct ip_conntrack *i, void *data)
1085 {
1086         return 1;
1087 }
1088
1089 static void free_conntrack_hash(void)
1090 {
1091         if (ip_conntrack_vmalloc)
1092                 vfree(ip_conntrack_hash);
1093         else
1094                 free_pages((unsigned long)ip_conntrack_hash, 
1095                            get_order(sizeof(struct list_head)
1096                                      * ip_conntrack_htable_size));
1097 }
1098
1099 /* Mishearing the voices in his head, our hero wonders how he's
1100    supposed to kill the mall. */
1101 void ip_conntrack_cleanup(void)
1102 {
1103         ip_ct_attach = NULL;
1104         /* This makes sure all current packets have passed through
1105            netfilter framework.  Roll on, two-stage module
1106            delete... */
1107         synchronize_net();
1108  
1109  i_see_dead_people:
1110         ip_ct_iterate_cleanup(kill_all, NULL);
1111         if (atomic_read(&ip_conntrack_count) != 0) {
1112                 schedule();
1113                 goto i_see_dead_people;
1114         }
1115         /* wait until all references to ip_conntrack_untracked are dropped */
1116         while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
1117                 schedule();
1118
1119         kmem_cache_destroy(ip_conntrack_cachep);
1120         kmem_cache_destroy(ip_conntrack_expect_cachep);
1121         free_conntrack_hash();
1122         nf_unregister_sockopt(&so_getorigdst);
1123 }
1124
1125 static int hashsize;
1126 module_param(hashsize, int, 0400);
1127
1128 int __init ip_conntrack_init(void)
1129 {
1130         unsigned int i;
1131         int ret;
1132
1133         /* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
1134          * machine has 256 buckets.  >= 1GB machines have 8192 buckets. */
1135         if (hashsize) {
1136                 ip_conntrack_htable_size = hashsize;
1137         } else {
1138                 ip_conntrack_htable_size
1139                         = (((num_physpages << PAGE_SHIFT) / 16384)
1140                            / sizeof(struct list_head));
1141                 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
1142                         ip_conntrack_htable_size = 8192;
1143                 if (ip_conntrack_htable_size < 16)
1144                         ip_conntrack_htable_size = 16;
1145         }
1146         ip_conntrack_max = 8 * ip_conntrack_htable_size;
1147
1148         printk("ip_conntrack version %s (%u buckets, %d max)"
1149                " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
1150                ip_conntrack_htable_size, ip_conntrack_max,
1151                sizeof(struct ip_conntrack));
1152
1153         ret = nf_register_sockopt(&so_getorigdst);
1154         if (ret != 0) {
1155                 printk(KERN_ERR "Unable to register netfilter socket option\n");
1156                 return ret;
1157         }
1158
1159         /* AK: the hash table is twice as big than needed because it
1160            uses list_head.  it would be much nicer to caches to use a
1161            single pointer list head here. */
1162         ip_conntrack_vmalloc = 0; 
1163         ip_conntrack_hash 
1164                 =(void*)__get_free_pages(GFP_KERNEL, 
1165                                          get_order(sizeof(struct list_head)
1166                                                    *ip_conntrack_htable_size));
1167         if (!ip_conntrack_hash) { 
1168                 ip_conntrack_vmalloc = 1;
1169                 printk(KERN_WARNING "ip_conntrack: falling back to vmalloc.\n");
1170                 ip_conntrack_hash = vmalloc(sizeof(struct list_head)
1171                                             * ip_conntrack_htable_size);
1172         }
1173         if (!ip_conntrack_hash) {
1174                 printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
1175                 goto err_unreg_sockopt;
1176         }
1177
1178         ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
1179                                                 sizeof(struct ip_conntrack), 0,
1180                                                 0, NULL, NULL);
1181         if (!ip_conntrack_cachep) {
1182                 printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
1183                 goto err_free_hash;
1184         }
1185
1186         ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect",
1187                                         sizeof(struct ip_conntrack_expect),
1188                                         0, 0, NULL, NULL);
1189         if (!ip_conntrack_expect_cachep) {
1190                 printk(KERN_ERR "Unable to create ip_expect slab cache\n");
1191                 goto err_free_conntrack_slab;
1192         }
1193
1194         /* Don't NEED lock here, but good form anyway. */
1195         write_lock_bh(&ip_conntrack_lock);
1196         for (i = 0; i < MAX_IP_CT_PROTO; i++)
1197                 ip_ct_protos[i] = &ip_conntrack_generic_protocol;
1198         /* Sew in builtin protocols. */
1199         ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
1200         ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
1201         ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
1202         write_unlock_bh(&ip_conntrack_lock);
1203
1204         for (i = 0; i < ip_conntrack_htable_size; i++)
1205                 INIT_LIST_HEAD(&ip_conntrack_hash[i]);
1206
1207         /* For use by ipt_REJECT */
1208         ip_ct_attach = ip_conntrack_attach;
1209
1210         /* Set up fake conntrack:
1211             - to never be deleted, not in any hashes */
1212         atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
1213         /*  - and look it like as a confirmed connection */
1214         set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
1215
1216         return ret;
1217
1218 err_free_conntrack_slab:
1219         kmem_cache_destroy(ip_conntrack_cachep);
1220 err_free_hash:
1221         free_conntrack_hash();
1222 err_unreg_sockopt:
1223         nf_unregister_sockopt(&so_getorigdst);
1224
1225         return -ENOMEM;
1226 }