1 /* Connection state tracking for netfilter. This is separated from,
2 but required by, the NAT layer; it can also be used by an iptables
5 /* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
12 * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
13 * - new API and handling of conntrack/nat helpers
14 * - now capable of multiple expectations for one master
15 * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
16 * - add usage/reference counts to ip_conntrack_expect
17 * - export ip_conntrack[_expect]_{find_get,put} functions
20 #include <linux/config.h>
21 #include <linux/types.h>
22 #include <linux/icmp.h>
24 #include <linux/netfilter.h>
25 #include <linux/netfilter_ipv4.h>
26 #include <linux/module.h>
27 #include <linux/skbuff.h>
28 #include <linux/proc_fs.h>
29 #include <linux/vmalloc.h>
30 #include <net/checksum.h>
32 #include <linux/stddef.h>
33 #include <linux/sysctl.h>
34 #include <linux/slab.h>
35 #include <linux/random.h>
36 #include <linux/jhash.h>
37 #include <linux/err.h>
38 #include <linux/percpu.h>
39 #include <linux/moduleparam.h>
40 #include <linux/notifier.h>
42 /* ip_conntrack_lock protects the main hash table, protocol/helper/expected
43 registrations, conntrack timers*/
44 #define ASSERT_READ_LOCK(x)
45 #define ASSERT_WRITE_LOCK(x)
47 #include <linux/netfilter_ipv4/ip_conntrack.h>
48 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
49 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
50 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
51 #include <linux/netfilter_ipv4/listhelp.h>
53 #define IP_CONNTRACK_VERSION "2.3"
58 #define DEBUGP(format, args...)
61 DEFINE_RWLOCK(ip_conntrack_lock);
63 /* ip_conntrack_standalone needs this */
64 atomic_t ip_conntrack_count = ATOMIC_INIT(0);
66 void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
67 LIST_HEAD(ip_conntrack_expect_list);
68 struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
69 static LIST_HEAD(helpers);
70 unsigned int ip_conntrack_htable_size = 0;
72 struct list_head *ip_conntrack_hash;
73 static kmem_cache_t *ip_conntrack_cachep __read_mostly;
74 static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly;
75 struct ip_conntrack ip_conntrack_untracked;
76 unsigned int ip_ct_log_invalid;
77 static LIST_HEAD(unconfirmed);
78 static int ip_conntrack_vmalloc;
80 static unsigned int ip_conntrack_next_id = 1;
81 static unsigned int ip_conntrack_expect_next_id = 1;
82 #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
83 struct notifier_block *ip_conntrack_chain;
84 struct notifier_block *ip_conntrack_expect_chain;
86 DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache);
88 /* deliver cached events and clear cache entry - must be called with locally
89 * disabled softirqs */
91 __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache)
93 DEBUGP("ecache: delivering events for %p\n", ecache->ct);
94 if (is_confirmed(ecache->ct) && !is_dying(ecache->ct) && ecache->events)
95 notifier_call_chain(&ip_conntrack_chain, ecache->events,
98 ip_conntrack_put(ecache->ct);
102 /* Deliver all cached events for a particular conntrack. This is called
103 * by code prior to async packet handling or freeing the skb */
104 void ip_ct_deliver_cached_events(const struct ip_conntrack *ct)
106 struct ip_conntrack_ecache *ecache;
109 ecache = &__get_cpu_var(ip_conntrack_ecache);
110 if (ecache->ct == ct)
111 __ip_ct_deliver_cached_events(ecache);
115 void __ip_ct_event_cache_init(struct ip_conntrack *ct)
117 struct ip_conntrack_ecache *ecache;
119 /* take care of delivering potentially old events */
120 ecache = &__get_cpu_var(ip_conntrack_ecache);
121 BUG_ON(ecache->ct == ct);
123 __ip_ct_deliver_cached_events(ecache);
124 /* initialize for this conntrack/packet */
126 nf_conntrack_get(&ct->ct_general);
129 /* flush the event cache - touches other CPU's data and must not be called while
130 * packets are still passing through the code */
131 static void ip_ct_event_cache_flush(void)
133 struct ip_conntrack_ecache *ecache;
137 ecache = &per_cpu(ip_conntrack_ecache, cpu);
139 ip_conntrack_put(ecache->ct);
143 static inline void ip_ct_event_cache_flush(void) {}
144 #endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */
146 DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
148 static int ip_conntrack_hash_rnd_initted;
149 static unsigned int ip_conntrack_hash_rnd;
152 hash_conntrack(const struct ip_conntrack_tuple *tuple)
157 return (jhash_3words(tuple->src.ip,
158 (tuple->dst.ip ^ tuple->dst.protonum),
159 (tuple->src.u.all | (tuple->dst.u.all << 16)),
160 ip_conntrack_hash_rnd) % ip_conntrack_htable_size);
164 ip_ct_get_tuple(const struct iphdr *iph,
165 const struct sk_buff *skb,
166 unsigned int dataoff,
167 struct ip_conntrack_tuple *tuple,
168 const struct ip_conntrack_protocol *protocol)
171 if (iph->frag_off & htons(IP_OFFSET)) {
172 printk("ip_conntrack_core: Frag of proto %u.\n",
177 tuple->src.ip = iph->saddr;
178 tuple->dst.ip = iph->daddr;
179 tuple->dst.protonum = iph->protocol;
180 tuple->dst.dir = IP_CT_DIR_ORIGINAL;
182 return protocol->pkt_to_tuple(skb, dataoff, tuple);
186 ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse,
187 const struct ip_conntrack_tuple *orig,
188 const struct ip_conntrack_protocol *protocol)
190 inverse->src.ip = orig->dst.ip;
191 inverse->dst.ip = orig->src.ip;
192 inverse->dst.protonum = orig->dst.protonum;
193 inverse->dst.dir = !orig->dst.dir;
195 return protocol->invert_tuple(inverse, orig);
199 /* ip_conntrack_expect helper functions */
200 static void unlink_expect(struct ip_conntrack_expect *exp)
202 ASSERT_WRITE_LOCK(&ip_conntrack_lock);
203 IP_NF_ASSERT(!timer_pending(&exp->timeout));
204 list_del(&exp->list);
205 CONNTRACK_STAT_INC(expect_delete);
206 exp->master->expecting--;
207 ip_conntrack_expect_put(exp);
210 void __ip_ct_expect_unlink_destroy(struct ip_conntrack_expect *exp)
213 ip_conntrack_expect_put(exp);
216 static void expectation_timed_out(unsigned long ul_expect)
218 struct ip_conntrack_expect *exp = (void *)ul_expect;
220 write_lock_bh(&ip_conntrack_lock);
222 write_unlock_bh(&ip_conntrack_lock);
223 ip_conntrack_expect_put(exp);
226 struct ip_conntrack_expect *
227 __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
229 struct ip_conntrack_expect *i;
231 list_for_each_entry(i, &ip_conntrack_expect_list, list) {
232 if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
240 /* Just find a expectation corresponding to a tuple. */
241 struct ip_conntrack_expect *
242 ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple)
244 struct ip_conntrack_expect *i;
246 read_lock_bh(&ip_conntrack_lock);
247 i = __ip_conntrack_expect_find(tuple);
248 read_unlock_bh(&ip_conntrack_lock);
253 /* If an expectation for this connection is found, it gets delete from
254 * global list then returned. */
255 static struct ip_conntrack_expect *
256 find_expectation(const struct ip_conntrack_tuple *tuple)
258 struct ip_conntrack_expect *i;
260 list_for_each_entry(i, &ip_conntrack_expect_list, list) {
261 /* If master is not in hash table yet (ie. packet hasn't left
262 this machine yet), how can other end know about expected?
263 Hence these are not the droids you are looking for (if
264 master ct never got confirmed, we'd hold a reference to it
265 and weird things would happen to future packets). */
266 if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
267 && is_confirmed(i->master)) {
268 if (i->flags & IP_CT_EXPECT_PERMANENT) {
271 } else if (del_timer(&i->timeout)) {
280 /* delete all expectations for this conntrack */
281 void ip_ct_remove_expectations(struct ip_conntrack *ct)
283 struct ip_conntrack_expect *i, *tmp;
285 /* Optimization: most connection never expect any others. */
286 if (ct->expecting == 0)
289 list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) {
290 if (i->master == ct && del_timer(&i->timeout)) {
292 ip_conntrack_expect_put(i);
298 clean_from_lists(struct ip_conntrack *ct)
302 DEBUGP("clean_from_lists(%p)\n", ct);
303 ASSERT_WRITE_LOCK(&ip_conntrack_lock);
305 ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
306 hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
307 LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
308 LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
310 /* Destroy all pending expectations */
311 ip_ct_remove_expectations(ct);
315 destroy_conntrack(struct nf_conntrack *nfct)
317 struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
318 struct ip_conntrack_protocol *proto;
320 DEBUGP("destroy_conntrack(%p)\n", ct);
321 IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
322 IP_NF_ASSERT(!timer_pending(&ct->timeout));
324 ip_conntrack_event(IPCT_DESTROY, ct);
325 set_bit(IPS_DYING_BIT, &ct->status);
327 /* To make sure we don't get any weird locking issues here:
328 * destroy_conntrack() MUST NOT be called with a write lock
329 * to ip_conntrack_lock!!! -HW */
330 proto = __ip_conntrack_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
331 if (proto && proto->destroy)
334 if (ip_conntrack_destroyed)
335 ip_conntrack_destroyed(ct);
337 write_lock_bh(&ip_conntrack_lock);
338 /* Expectations will have been removed in clean_from_lists,
339 * except TFTP can create an expectation on the first packet,
340 * before connection is in the list, so we need to clean here,
342 ip_ct_remove_expectations(ct);
344 /* We overload first tuple to link into unconfirmed list. */
345 if (!is_confirmed(ct)) {
346 BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list));
347 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
350 CONNTRACK_STAT_INC(delete);
351 write_unlock_bh(&ip_conntrack_lock);
354 ip_conntrack_put(ct->master);
356 DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
357 ip_conntrack_free(ct);
360 static void death_by_timeout(unsigned long ul_conntrack)
362 struct ip_conntrack *ct = (void *)ul_conntrack;
364 write_lock_bh(&ip_conntrack_lock);
365 /* Inside lock so preempt is disabled on module removal path.
366 * Otherwise we can get spurious warnings. */
367 CONNTRACK_STAT_INC(delete_list);
368 clean_from_lists(ct);
369 write_unlock_bh(&ip_conntrack_lock);
370 ip_conntrack_put(ct);
374 conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
375 const struct ip_conntrack_tuple *tuple,
376 const struct ip_conntrack *ignored_conntrack)
378 ASSERT_READ_LOCK(&ip_conntrack_lock);
379 return tuplehash_to_ctrack(i) != ignored_conntrack
380 && ip_ct_tuple_equal(tuple, &i->tuple);
383 struct ip_conntrack_tuple_hash *
384 __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
385 const struct ip_conntrack *ignored_conntrack)
387 struct ip_conntrack_tuple_hash *h;
388 unsigned int hash = hash_conntrack(tuple);
390 ASSERT_READ_LOCK(&ip_conntrack_lock);
391 list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
392 if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
393 CONNTRACK_STAT_INC(found);
396 CONNTRACK_STAT_INC(searched);
402 /* Find a connection corresponding to a tuple. */
403 struct ip_conntrack_tuple_hash *
404 ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
405 const struct ip_conntrack *ignored_conntrack)
407 struct ip_conntrack_tuple_hash *h;
409 read_lock_bh(&ip_conntrack_lock);
410 h = __ip_conntrack_find(tuple, ignored_conntrack);
412 atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
413 read_unlock_bh(&ip_conntrack_lock);
418 static void __ip_conntrack_hash_insert(struct ip_conntrack *ct,
420 unsigned int repl_hash)
422 ct->id = ++ip_conntrack_next_id;
423 list_prepend(&ip_conntrack_hash[hash],
424 &ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
425 list_prepend(&ip_conntrack_hash[repl_hash],
426 &ct->tuplehash[IP_CT_DIR_REPLY].list);
429 void ip_conntrack_hash_insert(struct ip_conntrack *ct)
431 unsigned int hash, repl_hash;
433 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
434 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
436 write_lock_bh(&ip_conntrack_lock);
437 __ip_conntrack_hash_insert(ct, hash, repl_hash);
438 write_unlock_bh(&ip_conntrack_lock);
441 /* Confirm a connection given skb; places it in hash table */
443 __ip_conntrack_confirm(struct sk_buff **pskb)
445 unsigned int hash, repl_hash;
446 struct ip_conntrack *ct;
447 enum ip_conntrack_info ctinfo;
449 ct = ip_conntrack_get(*pskb, &ctinfo);
451 /* ipt_REJECT uses ip_conntrack_attach to attach related
452 ICMP/TCP RST packets in other direction. Actual packet
453 which created connection will be IP_CT_NEW or for an
454 expected connection, IP_CT_RELATED. */
455 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
458 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
459 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
461 /* We're not in hash table, and we refuse to set up related
462 connections for unconfirmed conns. But packet copies and
463 REJECT will give spurious warnings here. */
464 /* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
466 /* No external references means noone else could have
468 IP_NF_ASSERT(!is_confirmed(ct));
469 DEBUGP("Confirming conntrack %p\n", ct);
471 write_lock_bh(&ip_conntrack_lock);
473 /* See if there's one in the list already, including reverse:
474 NAT could have grabbed it without realizing, since we're
475 not in the hash. If there is, we lost race. */
476 if (!LIST_FIND(&ip_conntrack_hash[hash],
478 struct ip_conntrack_tuple_hash *,
479 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
480 && !LIST_FIND(&ip_conntrack_hash[repl_hash],
482 struct ip_conntrack_tuple_hash *,
483 &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
484 /* Remove from unconfirmed list */
485 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
487 __ip_conntrack_hash_insert(ct, hash, repl_hash);
488 /* Timer relative to confirmation time, not original
489 setting time, otherwise we'd get timer wrap in
490 weird delay cases. */
491 ct->timeout.expires += jiffies;
492 add_timer(&ct->timeout);
493 atomic_inc(&ct->ct_general.use);
494 set_bit(IPS_CONFIRMED_BIT, &ct->status);
495 CONNTRACK_STAT_INC(insert);
496 write_unlock_bh(&ip_conntrack_lock);
498 ip_conntrack_event_cache(IPCT_HELPER, *pskb);
499 #ifdef CONFIG_IP_NF_NAT_NEEDED
500 if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
501 test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
502 ip_conntrack_event_cache(IPCT_NATINFO, *pskb);
504 ip_conntrack_event_cache(master_ct(ct) ?
505 IPCT_RELATED : IPCT_NEW, *pskb);
510 CONNTRACK_STAT_INC(insert_failed);
511 write_unlock_bh(&ip_conntrack_lock);
516 /* Returns true if a connection correspondings to the tuple (required
519 ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
520 const struct ip_conntrack *ignored_conntrack)
522 struct ip_conntrack_tuple_hash *h;
524 read_lock_bh(&ip_conntrack_lock);
525 h = __ip_conntrack_find(tuple, ignored_conntrack);
526 read_unlock_bh(&ip_conntrack_lock);
531 /* There's a small race here where we may free a just-assured
532 connection. Too bad: we're in trouble anyway. */
533 static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
535 return !(test_bit(IPS_ASSURED_BIT, &tuplehash_to_ctrack(i)->status));
538 static int early_drop(struct list_head *chain)
540 /* Traverse backwards: gives us oldest, which is roughly LRU */
541 struct ip_conntrack_tuple_hash *h;
542 struct ip_conntrack *ct = NULL;
545 read_lock_bh(&ip_conntrack_lock);
546 h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *);
548 ct = tuplehash_to_ctrack(h);
549 atomic_inc(&ct->ct_general.use);
551 read_unlock_bh(&ip_conntrack_lock);
556 if (del_timer(&ct->timeout)) {
557 death_by_timeout((unsigned long)ct);
559 CONNTRACK_STAT_INC(early_drop);
561 ip_conntrack_put(ct);
565 static inline int helper_cmp(const struct ip_conntrack_helper *i,
566 const struct ip_conntrack_tuple *rtuple)
568 return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
571 static struct ip_conntrack_helper *
572 __ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple)
574 return LIST_FIND(&helpers, helper_cmp,
575 struct ip_conntrack_helper *,
579 struct ip_conntrack_helper *
580 ip_conntrack_helper_find_get( const struct ip_conntrack_tuple *tuple)
582 struct ip_conntrack_helper *helper;
584 /* need ip_conntrack_lock to assure that helper exists until
585 * try_module_get() is called */
586 read_lock_bh(&ip_conntrack_lock);
588 helper = __ip_conntrack_helper_find(tuple);
590 /* need to increase module usage count to assure helper will
591 * not go away while the caller is e.g. busy putting a
592 * conntrack in the hash that uses the helper */
593 if (!try_module_get(helper->me))
597 read_unlock_bh(&ip_conntrack_lock);
602 void ip_conntrack_helper_put(struct ip_conntrack_helper *helper)
604 module_put(helper->me);
607 struct ip_conntrack_protocol *
608 __ip_conntrack_proto_find(u_int8_t protocol)
610 return ip_ct_protos[protocol];
613 /* this is guaranteed to always return a valid protocol helper, since
614 * it falls back to generic_protocol */
615 struct ip_conntrack_protocol *
616 ip_conntrack_proto_find_get(u_int8_t protocol)
618 struct ip_conntrack_protocol *p;
621 p = __ip_conntrack_proto_find(protocol);
623 if (!try_module_get(p->me))
624 p = &ip_conntrack_generic_protocol;
631 void ip_conntrack_proto_put(struct ip_conntrack_protocol *p)
636 struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
637 struct ip_conntrack_tuple *repl)
639 struct ip_conntrack *conntrack;
641 if (!ip_conntrack_hash_rnd_initted) {
642 get_random_bytes(&ip_conntrack_hash_rnd, 4);
643 ip_conntrack_hash_rnd_initted = 1;
647 && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
648 unsigned int hash = hash_conntrack(orig);
649 /* Try dropping from this hash chain. */
650 if (!early_drop(&ip_conntrack_hash[hash])) {
653 "ip_conntrack: table full, dropping"
655 return ERR_PTR(-ENOMEM);
659 conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
661 DEBUGP("Can't allocate conntrack.\n");
662 return ERR_PTR(-ENOMEM);
665 memset(conntrack, 0, sizeof(*conntrack));
666 atomic_set(&conntrack->ct_general.use, 1);
667 conntrack->ct_general.destroy = destroy_conntrack;
668 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
669 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
670 /* Don't set timer yet: wait for confirmation */
671 init_timer(&conntrack->timeout);
672 conntrack->timeout.data = (unsigned long)conntrack;
673 conntrack->timeout.function = death_by_timeout;
675 atomic_inc(&ip_conntrack_count);
681 ip_conntrack_free(struct ip_conntrack *conntrack)
683 atomic_dec(&ip_conntrack_count);
684 kmem_cache_free(ip_conntrack_cachep, conntrack);
687 /* Allocate a new conntrack: we return -ENOMEM if classification
688 * failed due to stress. Otherwise it really is unclassifiable */
689 static struct ip_conntrack_tuple_hash *
690 init_conntrack(struct ip_conntrack_tuple *tuple,
691 struct ip_conntrack_protocol *protocol,
694 struct ip_conntrack *conntrack;
695 struct ip_conntrack_tuple repl_tuple;
696 struct ip_conntrack_expect *exp;
698 if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) {
699 DEBUGP("Can't invert tuple.\n");
703 conntrack = ip_conntrack_alloc(tuple, &repl_tuple);
704 if (conntrack == NULL || IS_ERR(conntrack))
705 return (struct ip_conntrack_tuple_hash *)conntrack;
707 if (!protocol->new(conntrack, skb)) {
708 ip_conntrack_free(conntrack);
712 write_lock_bh(&ip_conntrack_lock);
713 exp = find_expectation(tuple);
716 DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
718 /* Welcome, Mr. Bond. We've been expecting you... */
719 __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
720 conntrack->master = exp->master;
721 #ifdef CONFIG_IP_NF_CONNTRACK_MARK
722 conntrack->mark = exp->master->mark;
724 #if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
725 defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
726 /* this is ugly, but there is no other place where to put it */
727 conntrack->nat.masq_index = exp->master->nat.masq_index;
729 nf_conntrack_get(&conntrack->master->ct_general);
730 CONNTRACK_STAT_INC(expect_new);
732 conntrack->helper = __ip_conntrack_helper_find(&repl_tuple);
734 CONNTRACK_STAT_INC(new);
737 /* Overload tuple linked list to put us in unconfirmed list. */
738 list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
740 write_unlock_bh(&ip_conntrack_lock);
744 exp->expectfn(conntrack, exp);
745 ip_conntrack_expect_put(exp);
748 return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
751 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
752 static inline struct ip_conntrack *
753 resolve_normal_ct(struct sk_buff *skb,
754 struct ip_conntrack_protocol *proto,
756 unsigned int hooknum,
757 enum ip_conntrack_info *ctinfo)
759 struct ip_conntrack_tuple tuple;
760 struct ip_conntrack_tuple_hash *h;
761 struct ip_conntrack *ct;
763 IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
765 if (!ip_ct_get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4,
769 /* look for tuple match */
770 h = ip_conntrack_find_get(&tuple, NULL);
772 h = init_conntrack(&tuple, proto, skb);
778 ct = tuplehash_to_ctrack(h);
780 /* It exists; we have (non-exclusive) reference. */
781 if (DIRECTION(h) == IP_CT_DIR_REPLY) {
782 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
783 /* Please set reply bit if this packet OK */
786 /* Once we've had two way comms, always ESTABLISHED. */
787 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
788 DEBUGP("ip_conntrack_in: normal packet for %p\n",
790 *ctinfo = IP_CT_ESTABLISHED;
791 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
792 DEBUGP("ip_conntrack_in: related packet for %p\n",
794 *ctinfo = IP_CT_RELATED;
796 DEBUGP("ip_conntrack_in: new packet for %p\n",
802 skb->nfct = &ct->ct_general;
803 skb->nfctinfo = *ctinfo;
807 /* Netfilter hook itself. */
808 unsigned int ip_conntrack_in(unsigned int hooknum,
809 struct sk_buff **pskb,
810 const struct net_device *in,
811 const struct net_device *out,
812 int (*okfn)(struct sk_buff *))
814 struct ip_conntrack *ct;
815 enum ip_conntrack_info ctinfo;
816 struct ip_conntrack_protocol *proto;
820 /* Previously seen (loopback or untracked)? Ignore. */
822 CONNTRACK_STAT_INC(ignore);
827 if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
828 if (net_ratelimit()) {
829 printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n",
830 (*pskb)->nh.iph->protocol, hooknum);
835 /* Doesn't cover locally-generated broadcast, so not worth it. */
837 /* Ignore broadcast: no `connection'. */
838 if ((*pskb)->pkt_type == PACKET_BROADCAST) {
839 printk("Broadcast packet!\n");
841 } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF))
842 == htonl(0x000000FF)) {
843 printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n",
844 NIPQUAD((*pskb)->nh.iph->saddr),
845 NIPQUAD((*pskb)->nh.iph->daddr),
846 (*pskb)->sk, (*pskb)->pkt_type);
850 proto = __ip_conntrack_proto_find((*pskb)->nh.iph->protocol);
852 /* It may be an special packet, error, unclean...
853 * inverse of the return code tells to the netfilter
854 * core what to do with the packet. */
855 if (proto->error != NULL
856 && (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) {
857 CONNTRACK_STAT_INC(error);
858 CONNTRACK_STAT_INC(invalid);
862 if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) {
863 /* Not valid part of a connection */
864 CONNTRACK_STAT_INC(invalid);
869 /* Too stressed to deal. */
870 CONNTRACK_STAT_INC(drop);
874 IP_NF_ASSERT((*pskb)->nfct);
876 ret = proto->packet(ct, *pskb, ctinfo);
878 /* Invalid: inverse of the return code tells
879 * the netfilter core what to do*/
880 nf_conntrack_put((*pskb)->nfct);
881 (*pskb)->nfct = NULL;
882 CONNTRACK_STAT_INC(invalid);
886 if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
887 ip_conntrack_event_cache(IPCT_STATUS, *pskb);
892 int invert_tuplepr(struct ip_conntrack_tuple *inverse,
893 const struct ip_conntrack_tuple *orig)
895 return ip_ct_invert_tuple(inverse, orig,
896 __ip_conntrack_proto_find(orig->dst.protonum));
899 /* Would two expected things clash? */
900 static inline int expect_clash(const struct ip_conntrack_expect *a,
901 const struct ip_conntrack_expect *b)
903 /* Part covered by intersection of masks must be unequal,
904 otherwise they clash */
905 struct ip_conntrack_tuple intersect_mask
906 = { { a->mask.src.ip & b->mask.src.ip,
907 { a->mask.src.u.all & b->mask.src.u.all } },
908 { a->mask.dst.ip & b->mask.dst.ip,
909 { a->mask.dst.u.all & b->mask.dst.u.all },
910 a->mask.dst.protonum & b->mask.dst.protonum } };
912 return ip_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
915 static inline int expect_matches(const struct ip_conntrack_expect *a,
916 const struct ip_conntrack_expect *b)
918 return a->master == b->master
919 && ip_ct_tuple_equal(&a->tuple, &b->tuple)
920 && ip_ct_tuple_equal(&a->mask, &b->mask);
923 /* Generally a bad idea to call this: could have matched already. */
924 void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp)
926 struct ip_conntrack_expect *i;
928 write_lock_bh(&ip_conntrack_lock);
929 /* choose the the oldest expectation to evict */
930 list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
931 if (expect_matches(i, exp) && del_timer(&i->timeout)) {
933 write_unlock_bh(&ip_conntrack_lock);
934 ip_conntrack_expect_put(i);
938 write_unlock_bh(&ip_conntrack_lock);
941 /* We don't increase the master conntrack refcount for non-fulfilled
942 * conntracks. During the conntrack destruction, the expectations are
943 * always killed before the conntrack itself */
944 struct ip_conntrack_expect *ip_conntrack_expect_alloc(struct ip_conntrack *me)
946 struct ip_conntrack_expect *new;
948 new = kmem_cache_alloc(ip_conntrack_expect_cachep, GFP_ATOMIC);
950 DEBUGP("expect_related: OOM allocating expect\n");
954 atomic_set(&new->use, 1);
958 void ip_conntrack_expect_put(struct ip_conntrack_expect *exp)
960 if (atomic_dec_and_test(&exp->use))
961 kmem_cache_free(ip_conntrack_expect_cachep, exp);
964 static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
966 atomic_inc(&exp->use);
967 exp->master->expecting++;
968 list_add(&exp->list, &ip_conntrack_expect_list);
970 init_timer(&exp->timeout);
971 exp->timeout.data = (unsigned long)exp;
972 exp->timeout.function = expectation_timed_out;
973 exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ;
974 add_timer(&exp->timeout);
976 exp->id = ++ip_conntrack_expect_next_id;
977 atomic_inc(&exp->use);
978 CONNTRACK_STAT_INC(expect_create);
981 /* Race with expectations being used means we could have none to find; OK. */
982 static void evict_oldest_expect(struct ip_conntrack *master)
984 struct ip_conntrack_expect *i;
986 list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
987 if (i->master == master) {
988 if (del_timer(&i->timeout)) {
990 ip_conntrack_expect_put(i);
997 static inline int refresh_timer(struct ip_conntrack_expect *i)
999 if (!del_timer(&i->timeout))
1002 i->timeout.expires = jiffies + i->master->helper->timeout*HZ;
1003 add_timer(&i->timeout);
1007 int ip_conntrack_expect_related(struct ip_conntrack_expect *expect)
1009 struct ip_conntrack_expect *i;
1012 DEBUGP("ip_conntrack_expect_related %p\n", related_to);
1013 DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
1014 DEBUGP("mask: "); DUMP_TUPLE(&expect->mask);
1016 write_lock_bh(&ip_conntrack_lock);
1017 list_for_each_entry(i, &ip_conntrack_expect_list, list) {
1018 if (expect_matches(i, expect)) {
1019 /* Refresh timer: if it's dying, ignore.. */
1020 if (refresh_timer(i)) {
1024 } else if (expect_clash(i, expect)) {
1030 /* Will be over limit? */
1031 if (expect->master->helper->max_expected &&
1032 expect->master->expecting >= expect->master->helper->max_expected)
1033 evict_oldest_expect(expect->master);
1035 ip_conntrack_expect_insert(expect);
1036 ip_conntrack_expect_event(IPEXP_NEW, expect);
1039 write_unlock_bh(&ip_conntrack_lock);
1043 /* Alter reply tuple (maybe alter helper). This is for NAT, and is
1044 implicitly racy: see __ip_conntrack_confirm */
1045 void ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
1046 const struct ip_conntrack_tuple *newreply)
1048 write_lock_bh(&ip_conntrack_lock);
1049 /* Should be unconfirmed, so not in hash table yet */
1050 IP_NF_ASSERT(!is_confirmed(conntrack));
1052 DEBUGP("Altering reply tuple of %p to ", conntrack);
1053 DUMP_TUPLE(newreply);
1055 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
1056 if (!conntrack->master && conntrack->expecting == 0)
1057 conntrack->helper = __ip_conntrack_helper_find(newreply);
1058 write_unlock_bh(&ip_conntrack_lock);
1061 int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
1063 BUG_ON(me->timeout == 0);
1064 write_lock_bh(&ip_conntrack_lock);
1065 list_prepend(&helpers, me);
1066 write_unlock_bh(&ip_conntrack_lock);
1071 struct ip_conntrack_helper *
1072 __ip_conntrack_helper_find_byname(const char *name)
1074 struct ip_conntrack_helper *h;
1076 list_for_each_entry(h, &helpers, list) {
1077 if (!strcmp(h->name, name))
1084 static inline int unhelp(struct ip_conntrack_tuple_hash *i,
1085 const struct ip_conntrack_helper *me)
1087 if (tuplehash_to_ctrack(i)->helper == me) {
1088 ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i));
1089 tuplehash_to_ctrack(i)->helper = NULL;
1094 void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
1097 struct ip_conntrack_expect *exp, *tmp;
1099 /* Need write lock here, to delete helper. */
1100 write_lock_bh(&ip_conntrack_lock);
1101 LIST_DELETE(&helpers, me);
1103 /* Get rid of expectations */
1104 list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
1105 if (exp->master->helper == me && del_timer(&exp->timeout)) {
1107 ip_conntrack_expect_put(exp);
1110 /* Get rid of expecteds, set helpers to NULL. */
1111 LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me);
1112 for (i = 0; i < ip_conntrack_htable_size; i++)
1113 LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
1114 struct ip_conntrack_tuple_hash *, me);
1115 write_unlock_bh(&ip_conntrack_lock);
1117 /* Someone could be still looking at the helper in a bh. */
1121 static inline void ct_add_counters(struct ip_conntrack *ct,
1122 enum ip_conntrack_info ctinfo,
1123 const struct sk_buff *skb)
1125 #ifdef CONFIG_IP_NF_CT_ACCT
1127 ct->counters[CTINFO2DIR(ctinfo)].packets++;
1128 ct->counters[CTINFO2DIR(ctinfo)].bytes +=
1129 ntohs(skb->nh.iph->tot_len);
1134 /* Refresh conntrack for this many jiffies and do accounting (if skb != NULL) */
1135 void ip_ct_refresh_acct(struct ip_conntrack *ct,
1136 enum ip_conntrack_info ctinfo,
1137 const struct sk_buff *skb,
1138 unsigned long extra_jiffies)
1140 IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
1142 /* If not in hash table, timer will not be active yet */
1143 if (!is_confirmed(ct)) {
1144 ct->timeout.expires = extra_jiffies;
1145 ct_add_counters(ct, ctinfo, skb);
1147 write_lock_bh(&ip_conntrack_lock);
1148 /* Need del_timer for race avoidance (may already be dying). */
1149 if (del_timer(&ct->timeout)) {
1150 ct->timeout.expires = jiffies + extra_jiffies;
1151 add_timer(&ct->timeout);
1152 ip_conntrack_event_cache(IPCT_REFRESH, skb);
1154 ct_add_counters(ct, ctinfo, skb);
1155 write_unlock_bh(&ip_conntrack_lock);
1159 #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
1160 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
1161 /* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
1162 * in ip_conntrack_core, since we don't want the protocols to autoload
1163 * or depend on ctnetlink */
1164 int ip_ct_port_tuple_to_nfattr(struct sk_buff *skb,
1165 const struct ip_conntrack_tuple *tuple)
1167 NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t),
1168 &tuple->src.u.tcp.port);
1169 NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t),
1170 &tuple->dst.u.tcp.port);
1177 int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[],
1178 struct ip_conntrack_tuple *t)
1180 if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1])
1184 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
1186 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
1192 /* Returns new sk_buff, or NULL */
1194 ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
1199 skb = ip_defrag(skb, user);
1203 ip_send_check(skb->nh.iph);
1207 /* Used by ipt_REJECT. */
1208 static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
1210 struct ip_conntrack *ct;
1211 enum ip_conntrack_info ctinfo;
1213 /* This ICMP is in reverse direction to the packet which caused it */
1214 ct = ip_conntrack_get(skb, &ctinfo);
1216 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
1217 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
1219 ctinfo = IP_CT_RELATED;
1221 /* Attach to new skbuff, and increment count */
1222 nskb->nfct = &ct->ct_general;
1223 nskb->nfctinfo = ctinfo;
1224 nf_conntrack_get(nskb->nfct);
1228 do_iter(const struct ip_conntrack_tuple_hash *i,
1229 int (*iter)(struct ip_conntrack *i, void *data),
1232 return iter(tuplehash_to_ctrack(i), data);
1235 /* Bring out ya dead! */
1236 static struct ip_conntrack_tuple_hash *
1237 get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data),
1238 void *data, unsigned int *bucket)
1240 struct ip_conntrack_tuple_hash *h = NULL;
1242 write_lock_bh(&ip_conntrack_lock);
1243 for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
1244 h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter,
1245 struct ip_conntrack_tuple_hash *, iter, data);
1250 h = LIST_FIND_W(&unconfirmed, do_iter,
1251 struct ip_conntrack_tuple_hash *, iter, data);
1253 atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
1254 write_unlock_bh(&ip_conntrack_lock);
1260 ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data)
1262 struct ip_conntrack_tuple_hash *h;
1263 unsigned int bucket = 0;
1265 while ((h = get_next_corpse(iter, data, &bucket)) != NULL) {
1266 struct ip_conntrack *ct = tuplehash_to_ctrack(h);
1267 /* Time to push up daises... */
1268 if (del_timer(&ct->timeout))
1269 death_by_timeout((unsigned long)ct);
1270 /* ... else the timer will get him soon. */
1272 ip_conntrack_put(ct);
1276 /* Fast function for those who don't want to parse /proc (and I don't
1278 /* Reversing the socket's dst/src point of view gives us the reply
1281 getorigdst(struct sock *sk, int optval, void __user *user, int *len)
1283 struct inet_sock *inet = inet_sk(sk);
1284 struct ip_conntrack_tuple_hash *h;
1285 struct ip_conntrack_tuple tuple;
1287 IP_CT_TUPLE_U_BLANK(&tuple);
1288 tuple.src.ip = inet->rcv_saddr;
1289 tuple.src.u.tcp.port = inet->sport;
1290 tuple.dst.ip = inet->daddr;
1291 tuple.dst.u.tcp.port = inet->dport;
1292 tuple.dst.protonum = IPPROTO_TCP;
1294 /* We only do TCP at the moment: is there a better way? */
1295 if (strcmp(sk->sk_prot->name, "TCP")) {
1296 DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
1297 return -ENOPROTOOPT;
1300 if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
1301 DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
1302 *len, sizeof(struct sockaddr_in));
1306 h = ip_conntrack_find_get(&tuple, NULL);
1308 struct sockaddr_in sin;
1309 struct ip_conntrack *ct = tuplehash_to_ctrack(h);
1311 sin.sin_family = AF_INET;
1312 sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
1313 .tuple.dst.u.tcp.port;
1314 sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
1317 DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
1318 NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
1319 ip_conntrack_put(ct);
1320 if (copy_to_user(user, &sin, sizeof(sin)) != 0)
1325 DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
1326 NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
1327 NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
1331 static struct nf_sockopt_ops so_getorigdst = {
1333 .get_optmin = SO_ORIGINAL_DST,
1334 .get_optmax = SO_ORIGINAL_DST+1,
1338 static int kill_all(struct ip_conntrack *i, void *data)
1343 static void free_conntrack_hash(void)
1345 if (ip_conntrack_vmalloc)
1346 vfree(ip_conntrack_hash);
1348 free_pages((unsigned long)ip_conntrack_hash,
1349 get_order(sizeof(struct list_head)
1350 * ip_conntrack_htable_size));
1353 void ip_conntrack_flush()
1355 /* This makes sure all current packets have passed through
1356 netfilter framework. Roll on, two-stage module
1360 ip_ct_event_cache_flush();
1362 ip_ct_iterate_cleanup(kill_all, NULL);
1363 if (atomic_read(&ip_conntrack_count) != 0) {
1365 goto i_see_dead_people;
1367 /* wait until all references to ip_conntrack_untracked are dropped */
1368 while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
1372 /* Mishearing the voices in his head, our hero wonders how he's
1373 supposed to kill the mall. */
1374 void ip_conntrack_cleanup(void)
1376 ip_ct_attach = NULL;
1377 ip_conntrack_flush();
1378 kmem_cache_destroy(ip_conntrack_cachep);
1379 kmem_cache_destroy(ip_conntrack_expect_cachep);
1380 free_conntrack_hash();
1381 nf_unregister_sockopt(&so_getorigdst);
1384 static int hashsize;
1385 module_param(hashsize, int, 0400);
1387 int __init ip_conntrack_init(void)
1392 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1393 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
1395 ip_conntrack_htable_size = hashsize;
1397 ip_conntrack_htable_size
1398 = (((num_physpages << PAGE_SHIFT) / 16384)
1399 / sizeof(struct list_head));
1400 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
1401 ip_conntrack_htable_size = 8192;
1402 if (ip_conntrack_htable_size < 16)
1403 ip_conntrack_htable_size = 16;
1405 ip_conntrack_max = 8 * ip_conntrack_htable_size;
1407 printk("ip_conntrack version %s (%u buckets, %d max)"
1408 " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
1409 ip_conntrack_htable_size, ip_conntrack_max,
1410 sizeof(struct ip_conntrack));
1412 ret = nf_register_sockopt(&so_getorigdst);
1414 printk(KERN_ERR "Unable to register netfilter socket option\n");
1418 /* AK: the hash table is twice as big than needed because it
1419 uses list_head. it would be much nicer to caches to use a
1420 single pointer list head here. */
1421 ip_conntrack_vmalloc = 0;
1423 =(void*)__get_free_pages(GFP_KERNEL,
1424 get_order(sizeof(struct list_head)
1425 *ip_conntrack_htable_size));
1426 if (!ip_conntrack_hash) {
1427 ip_conntrack_vmalloc = 1;
1428 printk(KERN_WARNING "ip_conntrack: falling back to vmalloc.\n");
1429 ip_conntrack_hash = vmalloc(sizeof(struct list_head)
1430 * ip_conntrack_htable_size);
1432 if (!ip_conntrack_hash) {
1433 printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
1434 goto err_unreg_sockopt;
1437 ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
1438 sizeof(struct ip_conntrack), 0,
1440 if (!ip_conntrack_cachep) {
1441 printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
1445 ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect",
1446 sizeof(struct ip_conntrack_expect),
1448 if (!ip_conntrack_expect_cachep) {
1449 printk(KERN_ERR "Unable to create ip_expect slab cache\n");
1450 goto err_free_conntrack_slab;
1453 /* Don't NEED lock here, but good form anyway. */
1454 write_lock_bh(&ip_conntrack_lock);
1455 for (i = 0; i < MAX_IP_CT_PROTO; i++)
1456 ip_ct_protos[i] = &ip_conntrack_generic_protocol;
1457 /* Sew in builtin protocols. */
1458 ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
1459 ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
1460 ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
1461 write_unlock_bh(&ip_conntrack_lock);
1463 for (i = 0; i < ip_conntrack_htable_size; i++)
1464 INIT_LIST_HEAD(&ip_conntrack_hash[i]);
1466 /* For use by ipt_REJECT */
1467 ip_ct_attach = ip_conntrack_attach;
1469 /* Set up fake conntrack:
1470 - to never be deleted, not in any hashes */
1471 atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
1472 /* - and look it like as a confirmed connection */
1473 set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
1477 err_free_conntrack_slab:
1478 kmem_cache_destroy(ip_conntrack_cachep);
1480 free_conntrack_hash();
1482 nf_unregister_sockopt(&so_getorigdst);