X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=net%2Fipv4%2Fip_fragment.c;h=2143bf30597a84b528e0cced57bc657dbe577417;hb=c5b875e354a54e2b5ba24eecae69bf94e025edd5;hp=321e694b72e88ceedb1e0dedcbd695379eb45224;hpb=7eb95156d9dce2f59794264db336ce007d71638b;p=linux-2.6 diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 321e694b72..2143bf3059 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -50,21 +50,8 @@ * as well. Or notify me, at least. --ANK */ -/* Fragment cache limits. We will commit 256K at one time. Should we - * cross that limit we will prune down to 192K. This should cope with - * even the most extreme cases without allowing an attacker to measurably - * harm machine performance. - */ -int sysctl_ipfrag_high_thresh __read_mostly = 256*1024; -int sysctl_ipfrag_low_thresh __read_mostly = 192*1024; - int sysctl_ipfrag_max_dist __read_mostly = 64; -/* Important NOTE! Fragment queue must be destroyed before MSL expires. - * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. - */ -int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME; - struct ipfrag_skb_cb { struct inet_skb_parm h; @@ -87,6 +74,25 @@ struct ipq { struct inet_peer *peer; }; +struct inet_frags_ctl ip4_frags_ctl __read_mostly = { + /* + * Fragment cache limits. We will commit 256K at one time. Should we + * cross that limit we will prune down to 192K. This should cope with + * even the most extreme cases without allowing an attacker to + * measurably harm machine performance. + */ + .high_thresh = 256 * 1024, + .low_thresh = 192 * 1024, + + /* + * Important NOTE! Fragment queue must be destroyed before MSL expires. + * RFC791 is wrong proposing to prolongate timer each fragment arrival + * by TTL. + */ + .timeout = IP_FRAG_TIME, + .secret_interval = 10 * 60 * HZ, +}; + static struct inet_frags ip4_frags; int ip_frag_nqueues(void) @@ -102,19 +108,10 @@ int ip_frag_mem(void) static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, struct net_device *dev); -static __inline__ void __ipq_unlink(struct ipq *qp) -{ - hlist_del(&qp->q.list); - list_del(&qp->q.lru_list); - ip4_frags.nqueues--; -} - -static __inline__ void ipq_unlink(struct ipq *ipq) -{ - write_lock(&ip4_frags.lock); - __ipq_unlink(ipq); - write_unlock(&ip4_frags.lock); -} +struct ip4_create_arg { + struct iphdr *iph; + u32 user; +}; static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot) { @@ -123,34 +120,25 @@ static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot) ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1); } -int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ; - -static void ipfrag_secret_rebuild(unsigned long dummy) +static unsigned int ip4_hashfn(struct inet_frag_queue *q) { - unsigned long now = jiffies; - int i; + struct ipq *ipq; - write_lock(&ip4_frags.lock); - get_random_bytes(&ip4_frags.rnd, sizeof(u32)); - for (i = 0; i < INETFRAGS_HASHSZ; i++) { - struct ipq *q; - struct hlist_node *p, *n; - - hlist_for_each_entry_safe(q, p, n, &ip4_frags.hash[i], q.list) { - unsigned int hval = ipqhashfn(q->id, q->saddr, - q->daddr, q->protocol); - - if (hval != i) { - hlist_del(&q->q.list); - - /* Relink to new hash chain. */ - hlist_add_head(&q->q.list, &ip4_frags.hash[hval]); - } - } - } - write_unlock(&ip4_frags.lock); + ipq = container_of(q, struct ipq, q); + return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol); +} - mod_timer(&ip4_frags.secret_timer, now + sysctl_ipfrag_secret_interval); +static int ip4_frag_match(struct inet_frag_queue *q, void *a) +{ + struct ipq *qp; + struct ip4_create_arg *arg = a; + + qp = container_of(q, struct ipq, q); + return (qp->id == arg->iph->id && + qp->saddr == arg->iph->saddr && + qp->daddr == arg->iph->daddr && + qp->protocol == arg->iph->protocol && + qp->user == arg->user); } /* Memory Tracking Functions. */ @@ -162,55 +150,35 @@ static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work) kfree_skb(skb); } -static __inline__ void frag_free_queue(struct ipq *qp, int *work) +static void ip4_frag_init(struct inet_frag_queue *q, void *a) { - if (work) - *work -= sizeof(struct ipq); - atomic_sub(sizeof(struct ipq), &ip4_frags.mem); - kfree(qp); + struct ipq *qp = container_of(q, struct ipq, q); + struct ip4_create_arg *arg = a; + + qp->protocol = arg->iph->protocol; + qp->id = arg->iph->id; + qp->saddr = arg->iph->saddr; + qp->daddr = arg->iph->daddr; + qp->user = arg->user; + qp->peer = sysctl_ipfrag_max_dist ? + inet_getpeer(arg->iph->saddr, 1) : NULL; } -static __inline__ struct ipq *frag_alloc_queue(void) +static __inline__ void ip4_frag_free(struct inet_frag_queue *q) { - struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC); - - if (!qp) - return NULL; - atomic_add(sizeof(struct ipq), &ip4_frags.mem); - return qp; -} - - -/* Destruction primitives. */ - -/* Complete destruction of ipq. */ -static void ip_frag_destroy(struct ipq *qp, int *work) -{ - struct sk_buff *fp; - - BUG_TRAP(qp->q.last_in&COMPLETE); - BUG_TRAP(del_timer(&qp->q.timer) == 0); + struct ipq *qp; + qp = container_of(q, struct ipq, q); if (qp->peer) inet_putpeer(qp->peer); +} - /* Release all fragment data. */ - fp = qp->q.fragments; - while (fp) { - struct sk_buff *xp = fp->next; - - frag_kfree_skb(fp, work); - fp = xp; - } - /* Finally, release the queue descriptor itself. */ - frag_free_queue(qp, work); -} +/* Destruction primitives. */ -static __inline__ void ipq_put(struct ipq *ipq, int *work) +static __inline__ void ipq_put(struct ipq *ipq) { - if (atomic_dec_and_test(&ipq->q.refcnt)) - ip_frag_destroy(ipq, work); + inet_frag_put(&ipq->q, &ip4_frags); } /* Kill ipq entry. It is not destroyed immediately, @@ -218,14 +186,7 @@ static __inline__ void ipq_put(struct ipq *ipq, int *work) */ static void ipq_kill(struct ipq *ipq) { - if (del_timer(&ipq->q.timer)) - atomic_dec(&ipq->q.refcnt); - - if (!(ipq->q.last_in & COMPLETE)) { - ipq_unlink(ipq); - atomic_dec(&ipq->q.refcnt); - ipq->q.last_in |= COMPLETE; - } + inet_frag_kill(&ipq->q, &ip4_frags); } /* Memory limiting on fragments. Evictor trashes the oldest @@ -233,33 +194,11 @@ static void ipq_kill(struct ipq *ipq) */ static void ip_evictor(void) { - struct ipq *qp; - struct list_head *tmp; - int work; - - work = atomic_read(&ip4_frags.mem) - sysctl_ipfrag_low_thresh; - if (work <= 0) - return; - - while (work > 0) { - read_lock(&ip4_frags.lock); - if (list_empty(&ip4_frags.lru_list)) { - read_unlock(&ip4_frags.lock); - return; - } - tmp = ip4_frags.lru_list.next; - qp = list_entry(tmp, struct ipq, q.lru_list); - atomic_inc(&qp->q.refcnt); - read_unlock(&ip4_frags.lock); + int evicted; - spin_lock(&qp->q.lock); - if (!(qp->q.last_in&COMPLETE)) - ipq_kill(qp); - spin_unlock(&qp->q.lock); - - ipq_put(qp, &work); - IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); - } + evicted = inet_frag_evictor(&ip4_frags); + if (evicted) + IP_ADD_STATS_BH(IPSTATS_MIB_REASMFAILS, evicted); } /* @@ -267,7 +206,9 @@ static void ip_evictor(void) */ static void ip_expire(unsigned long arg) { - struct ipq *qp = (struct ipq *) arg; + struct ipq *qp; + + qp = container_of((struct inet_frag_queue *) arg, struct ipq, q); spin_lock(&qp->q.lock); @@ -289,120 +230,33 @@ static void ip_expire(unsigned long arg) } out: spin_unlock(&qp->q.lock); - ipq_put(qp, NULL); + ipq_put(qp); } -/* Creation primitives. */ - -static struct ipq *ip_frag_intern(struct ipq *qp_in) +/* Find the correct entry in the "incomplete datagrams" queue for + * this IP datagram, and create new one, if nothing is found. + */ +static inline struct ipq *ip_find(struct iphdr *iph, u32 user) { - struct ipq *qp; -#ifdef CONFIG_SMP - struct hlist_node *n; -#endif + struct inet_frag_queue *q; + struct ip4_create_arg arg; unsigned int hash; - write_lock(&ip4_frags.lock); - hash = ipqhashfn(qp_in->id, qp_in->saddr, qp_in->daddr, - qp_in->protocol); -#ifdef CONFIG_SMP - /* With SMP race we have to recheck hash table, because - * such entry could be created on other cpu, while we - * promoted read lock to write lock. - */ - hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) { - if (qp->id == qp_in->id && - qp->saddr == qp_in->saddr && - qp->daddr == qp_in->daddr && - qp->protocol == qp_in->protocol && - qp->user == qp_in->user) { - atomic_inc(&qp->q.refcnt); - write_unlock(&ip4_frags.lock); - qp_in->q.last_in |= COMPLETE; - ipq_put(qp_in, NULL); - return qp; - } - } -#endif - qp = qp_in; - - if (!mod_timer(&qp->q.timer, jiffies + sysctl_ipfrag_time)) - atomic_inc(&qp->q.refcnt); - - atomic_inc(&qp->q.refcnt); - hlist_add_head(&qp->q.list, &ip4_frags.hash[hash]); - INIT_LIST_HEAD(&qp->q.lru_list); - list_add_tail(&qp->q.lru_list, &ip4_frags.lru_list); - ip4_frags.nqueues++; - write_unlock(&ip4_frags.lock); - return qp; -} - -/* Add an entry to the 'ipq' queue for a newly received IP datagram. */ -static struct ipq *ip_frag_create(struct iphdr *iph, u32 user) -{ - struct ipq *qp; + arg.iph = iph; + arg.user = user; + hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); - if ((qp = frag_alloc_queue()) == NULL) + q = inet_frag_find(&ip4_frags, &arg, hash); + if (q == NULL) goto out_nomem; - qp->protocol = iph->protocol; - qp->q.last_in = 0; - qp->id = iph->id; - qp->saddr = iph->saddr; - qp->daddr = iph->daddr; - qp->user = user; - qp->q.len = 0; - qp->q.meat = 0; - qp->q.fragments = NULL; - qp->iif = 0; - qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL; - - /* Initialize a timer for this entry. */ - init_timer(&qp->q.timer); - qp->q.timer.data = (unsigned long) qp; /* pointer to queue */ - qp->q.timer.function = ip_expire; /* expire function */ - spin_lock_init(&qp->q.lock); - atomic_set(&qp->q.refcnt, 1); - - return ip_frag_intern(qp); + return container_of(q, struct ipq, q); out_nomem: LIMIT_NETDEBUG(KERN_ERR "ip_frag_create: no memory left !\n"); return NULL; } -/* Find the correct entry in the "incomplete datagrams" queue for - * this IP datagram, and create new one, if nothing is found. - */ -static inline struct ipq *ip_find(struct iphdr *iph, u32 user) -{ - __be16 id = iph->id; - __be32 saddr = iph->saddr; - __be32 daddr = iph->daddr; - __u8 protocol = iph->protocol; - unsigned int hash; - struct ipq *qp; - struct hlist_node *n; - - read_lock(&ip4_frags.lock); - hash = ipqhashfn(id, saddr, daddr, protocol); - hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) { - if (qp->id == id && - qp->saddr == saddr && - qp->daddr == daddr && - qp->protocol == protocol && - qp->user == user) { - atomic_inc(&qp->q.refcnt); - read_unlock(&ip4_frags.lock); - return qp; - } - } - read_unlock(&ip4_frags.lock); - - return ip_frag_create(iph, user); -} - /* Is the fragment too far ahead to be part of ipq? */ static inline int ip_frag_too_far(struct ipq *qp) { @@ -432,7 +286,7 @@ static int ip_frag_reinit(struct ipq *qp) { struct sk_buff *fp; - if (!mod_timer(&qp->q.timer, jiffies + sysctl_ipfrag_time)) { + if (!mod_timer(&qp->q.timer, jiffies + ip4_frags_ctl.timeout)) { atomic_inc(&qp->q.refcnt); return -ETIMEDOUT; } @@ -632,7 +486,6 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, if (prev) { head = prev->next; fp = skb_clone(head, GFP_ATOMIC); - if (!fp) goto out_nomem; @@ -658,7 +511,6 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, goto out_oversize; /* Head of list must not be cloned. */ - err = -ENOMEM; if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) goto out_nomem; @@ -714,6 +566,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, out_nomem: LIMIT_NETDEBUG(KERN_ERR "IP: queue_glue: no memory for gluing " "queue %p\n", qp); + err = -ENOMEM; goto out_fail; out_oversize: if (net_ratelimit()) @@ -733,7 +586,7 @@ int ip_defrag(struct sk_buff *skb, u32 user) IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); /* Start by cleaning up the memory. */ - if (atomic_read(&ip4_frags.mem) > sysctl_ipfrag_high_thresh) + if (atomic_read(&ip4_frags.mem) > ip4_frags_ctl.high_thresh) ip_evictor(); /* Lookup (or create) queue header */ @@ -745,7 +598,7 @@ int ip_defrag(struct sk_buff *skb, u32 user) ret = ip_frag_queue(qp, skb); spin_unlock(&qp->q.lock); - ipq_put(qp, NULL); + ipq_put(qp); return ret; } @@ -756,11 +609,14 @@ int ip_defrag(struct sk_buff *skb, u32 user) void __init ipfrag_init(void) { - init_timer(&ip4_frags.secret_timer); - ip4_frags.secret_timer.function = ipfrag_secret_rebuild; - ip4_frags.secret_timer.expires = jiffies + sysctl_ipfrag_secret_interval; - add_timer(&ip4_frags.secret_timer); - + ip4_frags.ctl = &ip4_frags_ctl; + ip4_frags.hashfn = ip4_hashfn; + ip4_frags.constructor = ip4_frag_init; + ip4_frags.destructor = ip4_frag_free; + ip4_frags.skb_free = NULL; + ip4_frags.qsize = sizeof(struct ipq); + ip4_frags.match = ip4_frag_match; + ip4_frags.frag_expire = ip_expire; inet_frags_init(&ip4_frags); }