#include <linux/init.h>
#include <linux/ipv6.h>
#include <linux/skbuff.h>
+#include <linux/jhash.h>
#include <net/ip.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
/* Variables */
struct timer_list perturb_timer;
- int perturbation;
+ u32 perturbation;
sfq_index tail; /* Index of current slot in round */
sfq_index max_depth; /* Maximal depth */
static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
{
- int pert = q->perturbation;
-
- /* Have we any rotation primitives? If not, WHY? */
- h ^= (h1<<pert) ^ (h1>>(0x1F - pert));
- h ^= h>>10;
- return h & 0x3FF;
+ return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1);
}
static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
{
const struct iphdr *iph = ip_hdr(skb);
h = iph->daddr;
- h2 = iph->saddr^iph->protocol;
+ h2 = iph->saddr ^ iph->protocol;
if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
(iph->protocol == IPPROTO_TCP ||
iph->protocol == IPPROTO_UDP ||
{
struct ipv6hdr *iph = ipv6_hdr(skb);
h = iph->daddr.s6_addr32[3];
- h2 = iph->saddr.s6_addr32[3]^iph->nexthdr;
+ h2 = iph->saddr.s6_addr32[3] ^ iph->nexthdr;
if (iph->nexthdr == IPPROTO_TCP ||
iph->nexthdr == IPPROTO_UDP ||
iph->nexthdr == IPPROTO_UDPLITE ||
break;
}
default:
- h = (u32)(unsigned long)skb->dst^skb->protocol;
- h2 = (u32)(unsigned long)skb->sk;
+ h = (unsigned long)skb->dst ^ skb->protocol;
+ h2 = (unsigned long)skb->sk;
}
+
return sfq_fold_hash(q, h, h2);
}
drop a packet from it */
if (d > 1) {
- sfq_index x = q->dep[d+SFQ_DEPTH].next;
+ sfq_index x = q->dep[d + SFQ_DEPTH].next;
skb = q->qs[x].prev;
len = skb->len;
__skb_unlink(skb, &q->qs[x]);
}
static int
-sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct sfq_sched_data *q = qdisc_priv(sch);
unsigned hash = sfq_hash(q, skb);
q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
q->hash[x] = hash;
}
+
+ /* If selected queue has length q->limit, this means that
+ * all another queues are empty and that we do simple tail drop,
+ * i.e. drop _this_ packet.
+ */
+ if (q->qs[x].qlen >= q->limit)
+ return qdisc_drop(skb, sch);
+
sch->qstats.backlog += skb->len;
__skb_queue_tail(&q->qs[x], skb);
sfq_inc(q, x);
q->tail = x;
}
}
- if (++sch->q.qlen < q->limit-1) {
+ if (++sch->q.qlen <= q->limit) {
sch->bstats.bytes += skb->len;
sch->bstats.packets++;
return 0;
}
static int
-sfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
+sfq_requeue(struct sk_buff *skb, struct Qdisc *sch)
{
struct sfq_sched_data *q = qdisc_priv(sch);
unsigned hash = sfq_hash(q, skb);
q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
q->hash[x] = hash;
}
+
sch->qstats.backlog += skb->len;
__skb_queue_head(&q->qs[x], skb);
+ /* If selected queue has length q->limit+1, this means that
+ * all another queues are empty and we do simple tail drop.
+ * This packet is still requeued at head of queue, tail packet
+ * is dropped.
+ */
+ if (q->qs[x].qlen > q->limit) {
+ skb = q->qs[x].prev;
+ __skb_unlink(skb, &q->qs[x]);
+ sch->qstats.drops++;
+ sch->qstats.backlog -= skb->len;
+ kfree_skb(skb);
+ return NET_XMIT_CN;
+ }
+
sfq_inc(q, x);
if (q->qs[x].qlen == 1) { /* The flow is new */
if (q->tail == SFQ_DEPTH) { /* It is the first flow */
q->tail = x;
}
}
- if (++sch->q.qlen < q->limit - 1) {
+
+ if (++sch->q.qlen <= q->limit) {
sch->qstats.requeues++;
return 0;
}
static struct sk_buff *
-sfq_dequeue(struct Qdisc* sch)
+sfq_dequeue(struct Qdisc *sch)
{
struct sfq_sched_data *q = qdisc_priv(sch);
struct sk_buff *skb;
}
static void
-sfq_reset(struct Qdisc* sch)
+sfq_reset(struct Qdisc *sch)
{
struct sk_buff *skb;
static void sfq_perturbation(unsigned long arg)
{
- struct Qdisc *sch = (struct Qdisc*)arg;
+ struct Qdisc *sch = (struct Qdisc *)arg;
struct sfq_sched_data *q = qdisc_priv(sch);
- q->perturbation = net_random()&0x1F;
+ q->perturbation = net_random();
- if (q->perturb_period) {
- q->perturb_timer.expires = jiffies + q->perturb_period;
- add_timer(&q->perturb_timer);
- }
+ if (q->perturb_period)
+ mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
}
static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
sch_tree_lock(sch);
q->quantum = ctl->quantum ? : psched_mtu(sch->dev);
- q->perturb_period = ctl->perturb_period*HZ;
+ q->perturb_period = ctl->perturb_period * HZ;
if (ctl->limit)
- q->limit = min_t(u32, ctl->limit, SFQ_DEPTH);
+ q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
qlen = sch->q.qlen;
- while (sch->q.qlen >= q->limit-1)
+ while (sch->q.qlen > q->limit)
sfq_drop(sch);
qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
del_timer(&q->perturb_timer);
if (q->perturb_period) {
- q->perturb_timer.expires = jiffies + q->perturb_period;
- add_timer(&q->perturb_timer);
+ mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
+ q->perturbation = net_random();
}
sch_tree_unlock(sch);
return 0;
struct sfq_sched_data *q = qdisc_priv(sch);
int i;
- init_timer(&q->perturb_timer);
- q->perturb_timer.data = (unsigned long)sch;
q->perturb_timer.function = sfq_perturbation;
+ q->perturb_timer.data = (unsigned long)sch;;
+ init_timer_deferrable(&q->perturb_timer);
- for (i=0; i<SFQ_HASH_DIVISOR; i++)
+ for (i = 0; i < SFQ_HASH_DIVISOR; i++)
q->ht[i] = SFQ_DEPTH;
- for (i=0; i<SFQ_DEPTH; i++) {
+
+ for (i = 0; i < SFQ_DEPTH; i++) {
skb_queue_head_init(&q->qs[i]);
- q->dep[i+SFQ_DEPTH].next = i+SFQ_DEPTH;
- q->dep[i+SFQ_DEPTH].prev = i+SFQ_DEPTH;
+ q->dep[i + SFQ_DEPTH].next = i + SFQ_DEPTH;
+ q->dep[i + SFQ_DEPTH].prev = i + SFQ_DEPTH;
}
- q->limit = SFQ_DEPTH;
+
+ q->limit = SFQ_DEPTH - 1;
q->max_depth = 0;
q->tail = SFQ_DEPTH;
if (opt == NULL) {
q->quantum = psched_mtu(sch->dev);
q->perturb_period = 0;
+ q->perturbation = net_random();
} else {
int err = sfq_change(sch, opt);
if (err)
return err;
}
- for (i=0; i<SFQ_DEPTH; i++)
+
+ for (i = 0; i < SFQ_DEPTH; i++)
sfq_link(q, i);
return 0;
}
struct tc_sfq_qopt opt;
opt.quantum = q->quantum;
- opt.perturb_period = q->perturb_period/HZ;
+ opt.perturb_period = q->perturb_period / HZ;
opt.limit = q->limit;
opt.divisor = SFQ_HASH_DIVISOR;
return -1;
}
-static struct Qdisc_ops sfq_qdisc_ops = {
+static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
.next = NULL,
.cl_ops = NULL,
.id = "sfq",