#include <linux/netdevice.h>
#include <linux/socket.h>
-/* This rwlock protects the main hash table, protocol/helper/expected
- registrations, conntrack timers*/
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
-#include <net/netfilter/nf_conntrack_protocol.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_core.h>
/* nf_conntrack_standalone needs this */
atomic_t nf_conntrack_count = ATOMIC_INIT(0);
+EXPORT_SYMBOL_GPL(nf_conntrack_count);
void (*nf_conntrack_destroyed)(struct nf_conn *conntrack) = NULL;
-struct nf_conntrack_protocol **nf_ct_protos[PF_MAX] __read_mostly;
-struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX] __read_mostly;
-unsigned int nf_conntrack_htable_size __read_mostly = 0;
+unsigned int nf_conntrack_htable_size __read_mostly;
int nf_conntrack_max __read_mostly;
+EXPORT_SYMBOL_GPL(nf_conntrack_max);
struct list_head *nf_conntrack_hash __read_mostly;
-struct nf_conn nf_conntrack_untracked;
+struct nf_conn nf_conntrack_untracked __read_mostly;
unsigned int nf_ct_log_invalid __read_mostly;
LIST_HEAD(unconfirmed);
static int nf_conntrack_vmalloc __read_mostly;
/* This avoids calling kmem_cache_create() with same name simultaneously */
static DEFINE_MUTEX(nf_ct_cache_mutex);
-extern struct nf_conntrack_protocol nf_conntrack_generic_protocol;
-struct nf_conntrack_protocol *
-__nf_ct_proto_find(u_int16_t l3proto, u_int8_t protocol)
-{
- if (unlikely(l3proto >= AF_MAX || nf_ct_protos[l3proto] == NULL))
- return &nf_conntrack_generic_protocol;
-
- return nf_ct_protos[l3proto][protocol];
-}
-
-/* this is guaranteed to always return a valid protocol helper, since
- * it falls back to generic_protocol */
-struct nf_conntrack_protocol *
-nf_ct_proto_find_get(u_int16_t l3proto, u_int8_t protocol)
-{
- struct nf_conntrack_protocol *p;
-
- preempt_disable();
- p = __nf_ct_proto_find(l3proto, protocol);
- if (!try_module_get(p->me))
- p = &nf_conntrack_generic_protocol;
- preempt_enable();
-
- return p;
-}
-
-void nf_ct_proto_put(struct nf_conntrack_protocol *p)
-{
- module_put(p->me);
-}
-
-struct nf_conntrack_l3proto *
-nf_ct_l3proto_find_get(u_int16_t l3proto)
-{
- struct nf_conntrack_l3proto *p;
-
- preempt_disable();
- p = __nf_ct_l3proto_find(l3proto);
- if (!try_module_get(p->me))
- p = &nf_conntrack_generic_l3proto;
- preempt_enable();
-
- return p;
-}
-
-void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p)
-{
- module_put(p->me);
-}
-
-int
-nf_ct_l3proto_try_module_get(unsigned short l3proto)
-{
- int ret;
- struct nf_conntrack_l3proto *p;
-
-retry: p = nf_ct_l3proto_find_get(l3proto);
- if (p == &nf_conntrack_generic_l3proto) {
- ret = request_module("nf_conntrack-%d", l3proto);
- if (!ret)
- goto retry;
-
- return -EPROTOTYPE;
- }
-
- return 0;
-}
-
-void nf_ct_l3proto_module_put(unsigned short l3proto)
-{
- struct nf_conntrack_l3proto *p;
-
- preempt_disable();
- p = __nf_ct_l3proto_find(l3proto);
- preempt_enable();
-
- module_put(p->me);
-}
-
static int nf_conntrack_hash_rnd_initted;
static unsigned int nf_conntrack_hash_rnd;
u_int8_t protonum,
struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l3proto *l3proto,
- const struct nf_conntrack_protocol *protocol)
+ const struct nf_conntrack_l4proto *l4proto)
{
NF_CT_TUPLE_U_BLANK(tuple);
tuple->dst.protonum = protonum;
tuple->dst.dir = IP_CT_DIR_ORIGINAL;
- return protocol->pkt_to_tuple(skb, dataoff, tuple);
+ return l4proto->pkt_to_tuple(skb, dataoff, tuple);
}
int
nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
const struct nf_conntrack_tuple *orig,
const struct nf_conntrack_l3proto *l3proto,
- const struct nf_conntrack_protocol *protocol)
+ const struct nf_conntrack_l4proto *l4proto)
{
NF_CT_TUPLE_U_BLANK(inverse);
inverse->dst.dir = !orig->dst.dir;
inverse->dst.protonum = orig->dst.protonum;
- return protocol->invert_tuple(inverse, orig);
+ return l4proto->invert_tuple(inverse, orig);
}
static void
clean_from_lists(struct nf_conn *ct)
{
DEBUGP("clean_from_lists(%p)\n", ct);
- ASSERT_WRITE_LOCK(&nf_conntrack_lock);
list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list);
{
struct nf_conn *ct = (struct nf_conn *)nfct;
struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_protocol *proto;
+ struct nf_conntrack_l4proto *l4proto;
DEBUGP("destroy_conntrack(%p)\n", ct);
NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
if (l3proto && l3proto->destroy)
l3proto->destroy(ct);
- proto = __nf_ct_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
- if (proto && proto->destroy)
- proto->destroy(ct);
+ l4proto = __nf_ct_l4proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
+ if (l4proto && l4proto->destroy)
+ l4proto->destroy(ct);
if (nf_conntrack_destroyed)
nf_conntrack_destroyed(ct);
struct nf_conntrack_tuple_hash *h;
unsigned int hash = hash_conntrack(tuple);
- ASSERT_READ_LOCK(&nf_conntrack_lock);
list_for_each_entry(h, &nf_conntrack_hash[hash], list) {
if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack &&
nf_ct_tuple_equal(tuple, &h->tuple)) {
static struct nf_conn *
__nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
const struct nf_conntrack_tuple *repl,
- const struct nf_conntrack_l3proto *l3proto)
+ const struct nf_conntrack_l3proto *l3proto,
+ u_int32_t features)
{
struct nf_conn *conntrack = NULL;
- u_int32_t features = 0;
struct nf_conntrack_helper *helper;
if (unlikely(!nf_conntrack_hash_rnd_initted)) {
}
/* find features needed by this conntrack. */
- features = l3proto->get_features(orig);
+ features |= l3proto->get_features(orig);
/* FIXME: protect helper list per RCU */
read_lock_bh(&nf_conntrack_lock);
struct nf_conntrack_l3proto *l3proto;
l3proto = __nf_ct_l3proto_find(orig->src.l3num);
- return __nf_conntrack_alloc(orig, repl, l3proto);
+ return __nf_conntrack_alloc(orig, repl, l3proto, 0);
}
void nf_conntrack_free(struct nf_conn *conntrack)
static struct nf_conntrack_tuple_hash *
init_conntrack(const struct nf_conntrack_tuple *tuple,
struct nf_conntrack_l3proto *l3proto,
- struct nf_conntrack_protocol *protocol,
+ struct nf_conntrack_l4proto *l4proto,
struct sk_buff *skb,
unsigned int dataoff)
{
struct nf_conn *conntrack;
struct nf_conntrack_tuple repl_tuple;
struct nf_conntrack_expect *exp;
+ u_int32_t features = 0;
- if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, protocol)) {
+ if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
DEBUGP("Can't invert tuple.\n");
return NULL;
}
- conntrack = __nf_conntrack_alloc(tuple, &repl_tuple, l3proto);
+ read_lock_bh(&nf_conntrack_lock);
+ exp = __nf_conntrack_expect_find(tuple);
+ if (exp && exp->helper)
+ features = NF_CT_F_HELP;
+ read_unlock_bh(&nf_conntrack_lock);
+
+ conntrack = __nf_conntrack_alloc(tuple, &repl_tuple, l3proto, features);
if (conntrack == NULL || IS_ERR(conntrack)) {
DEBUGP("Can't allocate conntrack.\n");
return (struct nf_conntrack_tuple_hash *)conntrack;
}
- if (!protocol->new(conntrack, skb, dataoff)) {
+ if (!l4proto->new(conntrack, skb, dataoff)) {
nf_conntrack_free(conntrack);
DEBUGP("init conntrack: can't track with proto module\n");
return NULL;
/* Welcome, Mr. Bond. We've been expecting you... */
__set_bit(IPS_EXPECTED_BIT, &conntrack->status);
conntrack->master = exp->master;
+ if (exp->helper)
+ nfct_help(conntrack)->helper = exp->helper;
#ifdef CONFIG_NF_CONNTRACK_MARK
conntrack->mark = exp->master->mark;
#endif
u_int16_t l3num,
u_int8_t protonum,
struct nf_conntrack_l3proto *l3proto,
- struct nf_conntrack_protocol *proto,
+ struct nf_conntrack_l4proto *l4proto,
int *set_reply,
enum ip_conntrack_info *ctinfo)
{
if (!nf_ct_get_tuple(skb, (unsigned int)(skb->nh.raw - skb->data),
dataoff, l3num, protonum, &tuple, l3proto,
- proto)) {
+ l4proto)) {
DEBUGP("resolve_normal_ct: Can't get tuple\n");
return NULL;
}
/* look for tuple match */
h = nf_conntrack_find_get(&tuple, NULL);
if (!h) {
- h = init_conntrack(&tuple, l3proto, proto, skb, dataoff);
+ h = init_conntrack(&tuple, l3proto, l4proto, skb, dataoff);
if (!h)
return NULL;
if (IS_ERR(h))
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_protocol *proto;
+ struct nf_conntrack_l4proto *l4proto;
unsigned int dataoff;
u_int8_t protonum;
int set_reply = 0;
return -ret;
}
- proto = __nf_ct_proto_find((u_int16_t)pf, protonum);
+ l4proto = __nf_ct_l4proto_find((u_int16_t)pf, protonum);
/* It may be an special packet, error, unclean...
* inverse of the return code tells to the netfilter
* core what to do with the packet. */
- if (proto->error != NULL &&
- (ret = proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) {
+ if (l4proto->error != NULL &&
+ (ret = l4proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) {
NF_CT_STAT_INC(error);
NF_CT_STAT_INC(invalid);
return -ret;
}
- ct = resolve_normal_ct(*pskb, dataoff, pf, protonum, l3proto, proto,
+ ct = resolve_normal_ct(*pskb, dataoff, pf, protonum, l3proto, l4proto,
&set_reply, &ctinfo);
if (!ct) {
/* Not valid part of a connection */
NF_CT_ASSERT((*pskb)->nfct);
- ret = proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum);
+ ret = l4proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum);
if (ret < 0) {
/* Invalid: inverse of the return code tells
* the netfilter core what to do */
{
return nf_ct_invert_tuple(inverse, orig,
__nf_ct_l3proto_find(orig->src.l3num),
- __nf_ct_proto_find(orig->src.l3num,
+ __nf_ct_l4proto_find(orig->src.l3num,
orig->dst.protonum));
}
ct->timeout.expires = extra_jiffies;
event = IPCT_REFRESH;
} else {
- /* Need del_timer for race avoidance (may already be dying). */
- if (del_timer(&ct->timeout)) {
- ct->timeout.expires = jiffies + extra_jiffies;
+ unsigned long newtime = jiffies + extra_jiffies;
+
+ /* Only update the timeout if the new timeout is at least
+ HZ jiffies from the old timeout. Need del_timer for race
+ avoidance (may already be dying). */
+ if (newtime - ct->timeout.expires >= HZ
+ && del_timer(&ct->timeout)) {
+ ct->timeout.expires = newtime;
add_timer(&ct->timeout);
event = IPCT_REFRESH;
}
ct->counters[CTINFO2DIR(ctinfo)].packets++;
ct->counters[CTINFO2DIR(ctinfo)].bytes +=
skb->len - (unsigned int)(skb->nh.raw - skb->data);
- if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
- || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
- event |= IPCT_COUNTER_FILLING;
+
+ if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
+ || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
+ event |= IPCT_COUNTER_FILLING;
}
#endif
if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
return -EINVAL;
- t->src.u.tcp.port =
- *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
- t->dst.u.tcp.port =
- *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
+ t->src.u.tcp.port = *(__be16 *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
+ t->dst.u.tcp.port = *(__be16 *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
return 0;
}
free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
nf_conntrack_htable_size);
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_generic);
+
/* free l3proto protocol tables */
for (i = 0; i < PF_MAX; i++)
if (nf_ct_protos[i]) {
goto err_free_conntrack_slab;
}
+ ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_generic);
+ if (ret < 0)
+ goto out_free_expect_slab;
+
/* Don't NEED lock here, but good form anyway. */
write_lock_bh(&nf_conntrack_lock);
- for (i = 0; i < PF_MAX; i++)
- nf_ct_l3protos[i] = &nf_conntrack_generic_l3proto;
+ for (i = 0; i < AF_MAX; i++)
+ nf_ct_l3protos[i] = &nf_conntrack_l3proto_generic;
write_unlock_bh(&nf_conntrack_lock);
/* For use by REJECT target */
return ret;
+out_free_expect_slab:
+ kmem_cache_destroy(nf_conntrack_expect_cachep);
err_free_conntrack_slab:
nf_conntrack_unregister_cache(NF_CT_F_BASIC);
err_free_hash: