* from Ren Liu
* - More error checks
*
- *
- *
- * For all the glorious comments look at Alexey's sch_red.c
+ * For all the glorious comments look at include/net/red.h
*/
-#include <linux/config.h>
#include <linux/module.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
#include <linux/types.h>
#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/if_ether.h>
-#include <linux/inet.h>
#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/notifier.h>
-#include <net/ip.h>
-#include <net/route.h>
#include <linux/skbuff.h>
-#include <net/sock.h>
#include <net/pkt_sched.h>
#include <net/red.h>
-#if 1 /* control */
-#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
-#else
-#define DPRINTK(format,args...)
-#endif
-
-#if 0 /* data */
-#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
-#else
-#define D2PRINTK(format,args...)
-#endif
-
#define GRED_DEF_PRIO (MAX_DPs / 2)
+#define GRED_VQ_MASK (MAX_DPs - 1)
struct gred_sched_data;
struct gred_sched;
u32 bytesin; /* bytes seen on virtualQ so far*/
u32 packetsin; /* packets seen on virtualQ so far*/
u32 backlog; /* bytes on the virtualQ */
- u8 prio; /* the prio of this vq */
+ u8 prio; /* the prio of this vq */
struct red_parms parms;
struct red_stats stats;
{
struct gred_sched_data *tab[MAX_DPs];
unsigned long flags;
- u32 DPs;
- u32 def;
- u8 initd;
+ u32 red_flags;
+ u32 DPs;
+ u32 def;
+ struct red_parms wred_set;
};
static inline int gred_wred_mode(struct gred_sched *table)
return q->backlog;
}
-static int
-gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static inline u16 tc_index_to_dp(struct sk_buff *skb)
{
- struct gred_sched_data *q=NULL;
- struct gred_sched *t= qdisc_priv(sch);
- unsigned long qavg = 0;
- int i=0;
+ return skb->tc_index & GRED_VQ_MASK;
+}
- if (!t->initd && skb_queue_len(&sch->q) < (sch->dev->tx_queue_len ? : 1)) {
- D2PRINTK("NO GRED Queues setup yet! Enqueued anyway\n");
- goto do_enqueue;
- }
+static inline void gred_load_wred_set(struct gred_sched *table,
+ struct gred_sched_data *q)
+{
+ q->parms.qavg = table->wred_set.qavg;
+ q->parms.qidlestart = table->wred_set.qidlestart;
+}
+
+static inline void gred_store_wred_set(struct gred_sched *table,
+ struct gred_sched_data *q)
+{
+ table->wred_set.qavg = q->parms.qavg;
+}
+static inline int gred_use_ecn(struct gred_sched *t)
+{
+ return t->red_flags & TC_RED_ECN;
+}
+
+static inline int gred_use_harddrop(struct gred_sched *t)
+{
+ return t->red_flags & TC_RED_HARDDROP;
+}
- if ( ((skb->tc_index&0xf) > (t->DPs -1)) || !(q=t->tab[skb->tc_index&0xf])) {
- printk("GRED: setting to default (%d)\n ",t->def);
- if (!(q=t->tab[t->def])) {
- DPRINTK("GRED: setting to default FAILED! dropping!! "
- "(%d)\n ", t->def);
- goto drop;
+static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+ struct gred_sched_data *q=NULL;
+ struct gred_sched *t= qdisc_priv(sch);
+ unsigned long qavg = 0;
+ u16 dp = tc_index_to_dp(skb);
+
+ if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
+ dp = t->def;
+
+ if ((q = t->tab[dp]) == NULL) {
+ /* Pass through packets not assigned to a DP
+ * if no default DP has been configured. This
+ * allows for DP flows to be left untouched.
+ */
+ if (skb_queue_len(&sch->q) < sch->dev->tx_queue_len)
+ return qdisc_enqueue_tail(skb, sch);
+ else
+ goto drop;
}
+
/* fix tc_index? --could be controvesial but needed for
requeueing */
- skb->tc_index=(skb->tc_index&0xfffffff0) | t->def;
+ skb->tc_index = (skb->tc_index & ~GRED_VQ_MASK) | dp;
}
- D2PRINTK("gred_enqueue virtualQ 0x%x classid %x backlog %d "
- "general backlog %d\n",skb->tc_index&0xf,sch->handle,q->backlog,
- sch->qstats.backlog);
- /* sum up all the qaves of prios <= to ours to get the new qave*/
+ /* sum up all the qaves of prios <= to ours to get the new qave */
if (!gred_wred_mode(t) && gred_rio_mode(t)) {
- for (i=0;i<t->DPs;i++) {
- if ((!t->tab[i]) || (i==q->DP))
- continue;
-
- if (t->tab[i]->prio < q->prio &&
+ int i;
+
+ for (i = 0; i < t->DPs; i++) {
+ if (t->tab[i] && t->tab[i]->prio < q->prio &&
!red_is_idling(&t->tab[i]->parms))
qavg +=t->tab[i]->parms.qavg;
}
-
+
}
q->packetsin++;
- q->bytesin+=skb->len;
+ q->bytesin += skb->len;
- if (gred_wred_mode(t)) {
- qavg = 0;
- q->parms.qavg = t->tab[t->def]->parms.qavg;
- q->parms.qidlestart = t->tab[t->def]->parms.qidlestart;
- }
+ if (gred_wred_mode(t))
+ gred_load_wred_set(t, q);
q->parms.qavg = red_calc_qavg(&q->parms, gred_backlog(t, q, sch));
red_end_of_idle_period(&q->parms);
if (gred_wred_mode(t))
- t->tab[t->def]->parms.qavg = q->parms.qavg;
+ gred_store_wred_set(t, q);
switch (red_action(&q->parms, q->parms.qavg + qavg)) {
case RED_DONT_MARK:
case RED_PROB_MARK:
sch->qstats.overlimits++;
- q->stats.prob_drop++;
- goto drop;
+ if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) {
+ q->stats.prob_drop++;
+ goto congestion_drop;
+ }
+
+ q->stats.prob_mark++;
+ break;
case RED_HARD_MARK:
sch->qstats.overlimits++;
- q->stats.forced_drop++;
- goto drop;
+ if (gred_use_harddrop(t) || !gred_use_ecn(t) ||
+ !INET_ECN_set_ce(skb)) {
+ q->stats.forced_drop++;
+ goto congestion_drop;
+ }
+ q->stats.forced_mark++;
+ break;
}
if (q->backlog + skb->len <= q->limit) {
q->backlog += skb->len;
-do_enqueue:
- __skb_queue_tail(&sch->q, skb);
- sch->qstats.backlog += skb->len;
- sch->bstats.bytes += skb->len;
- sch->bstats.packets++;
- return 0;
+ return qdisc_enqueue_tail(skb, sch);
}
q->stats.pdrop++;
drop:
- kfree_skb(skb);
- sch->qstats.drops++;
- return NET_XMIT_DROP;
+ return qdisc_drop(skb, sch);
+
+congestion_drop:
+ qdisc_drop(skb, sch);
+ return NET_XMIT_CN;
}
-static int
-gred_requeue(struct sk_buff *skb, struct Qdisc* sch)
+static int gred_requeue(struct sk_buff *skb, struct Qdisc* sch)
{
+ struct gred_sched *t = qdisc_priv(sch);
struct gred_sched_data *q;
- struct gred_sched *t= qdisc_priv(sch);
- q= t->tab[(skb->tc_index&0xf)];
-/* error checking here -- probably unnecessary */
+ u16 dp = tc_index_to_dp(skb);
- if (red_is_idling(&q->parms))
- red_end_of_idle_period(&q->parms);
+ if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "GRED: Unable to relocate VQ 0x%x "
+ "for requeue, screwing up backlog.\n",
+ tc_index_to_dp(skb));
+ } else {
+ if (red_is_idling(&q->parms))
+ red_end_of_idle_period(&q->parms);
+ q->backlog += skb->len;
+ }
- __skb_queue_head(&sch->q, skb);
- sch->qstats.backlog += skb->len;
- sch->qstats.requeues++;
- q->backlog += skb->len;
- return 0;
+ return qdisc_requeue(skb, sch);
}
-static struct sk_buff *
-gred_dequeue(struct Qdisc* sch)
+static struct sk_buff *gred_dequeue(struct Qdisc* sch)
{
struct sk_buff *skb;
- struct gred_sched_data *q;
- struct gred_sched *t= qdisc_priv(sch);
+ struct gred_sched *t = qdisc_priv(sch);
+
+ skb = qdisc_dequeue_head(sch);
- skb = __skb_dequeue(&sch->q);
if (skb) {
- sch->qstats.backlog -= skb->len;
- q= t->tab[(skb->tc_index&0xf)];
- if (q) {
+ struct gred_sched_data *q;
+ u16 dp = tc_index_to_dp(skb);
+
+ if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "GRED: Unable to relocate "
+ "VQ 0x%x after dequeue, screwing up "
+ "backlog.\n", tc_index_to_dp(skb));
+ } else {
q->backlog -= skb->len;
+
if (!q->backlog && !gred_wred_mode(t))
red_start_of_idle_period(&q->parms);
- } else {
- D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf);
}
+
return skb;
}
- if (gred_wred_mode(t)) {
- q= t->tab[t->def];
- if (!q)
- D2PRINTK("no default VQ set: Results will be "
- "screwed up\n");
- else
- red_start_of_idle_period(&q->parms);
- }
+ if (gred_wred_mode(t) && !red_is_idling(&t->wred_set))
+ red_start_of_idle_period(&t->wred_set);
return NULL;
}
static unsigned int gred_drop(struct Qdisc* sch)
{
struct sk_buff *skb;
+ struct gred_sched *t = qdisc_priv(sch);
- struct gred_sched_data *q;
- struct gred_sched *t= qdisc_priv(sch);
-
- skb = __skb_dequeue_tail(&sch->q);
+ skb = qdisc_dequeue_tail(sch);
if (skb) {
unsigned int len = skb->len;
- sch->qstats.backlog -= len;
- sch->qstats.drops++;
- q= t->tab[(skb->tc_index&0xf)];
- if (q) {
+ struct gred_sched_data *q;
+ u16 dp = tc_index_to_dp(skb);
+
+ if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "GRED: Unable to relocate "
+ "VQ 0x%x while dropping, screwing up "
+ "backlog.\n", tc_index_to_dp(skb));
+ } else {
q->backlog -= len;
q->stats.other++;
+
if (!q->backlog && !gred_wred_mode(t))
red_start_of_idle_period(&q->parms);
- } else {
- D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf);
}
- kfree_skb(skb);
+ qdisc_drop(skb, sch);
return len;
}
- q=t->tab[t->def];
- if (!q) {
- D2PRINTK("no default VQ set: Results might be screwed up\n");
- return 0;
- }
+ if (gred_wred_mode(t) && !red_is_idling(&t->wred_set))
+ red_start_of_idle_period(&t->wred_set);
- red_start_of_idle_period(&q->parms);
return 0;
}
static void gred_reset(struct Qdisc* sch)
{
int i;
- struct gred_sched_data *q;
- struct gred_sched *t= qdisc_priv(sch);
+ struct gred_sched *t = qdisc_priv(sch);
- __skb_queue_purge(&sch->q);
+ qdisc_reset_queue(sch);
- sch->qstats.backlog = 0;
+ for (i = 0; i < t->DPs; i++) {
+ struct gred_sched_data *q = t->tab[i];
+
+ if (!q)
+ continue;
- for (i=0;i<t->DPs;i++) {
- q= t->tab[i];
- if (!q)
- continue;
red_restart(&q->parms);
q->backlog = 0;
- q->stats.other = 0;
- q->stats.forced_drop = 0;
- q->stats.prob_drop = 0;
- q->stats.pdrop = 0;
}
}
sch_tree_lock(sch);
table->DPs = sopt->DPs;
table->def = sopt->def_DP;
+ table->red_flags = sopt->flags;
/*
* Every entry point to GRED is synchronized with the above code
}
}
- table->initd = 0;
-
return 0;
}
struct gred_sched_data *q;
if (table->tab[dp] == NULL) {
- table->tab[dp] = kmalloc(sizeof(*q), GFP_KERNEL);
+ table->tab[dp] = kzalloc(sizeof(*q), GFP_KERNEL);
if (table->tab[dp] == NULL)
return -ENOMEM;
- memset(table->tab[dp], 0, sizeof(*q));
}
q = table->tab[dp];
ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog,
ctl->Scell_log, stab);
- q->stats.other = 0;
- q->stats.forced_drop = 0;
- q->stats.prob_drop = 0;
- q->stats.pdrop = 0;
-
return 0;
}
if (err < 0)
goto errout_locked;
- if (table->tab[table->def] == NULL) {
- if (gred_rio_mode(table))
- prio = table->tab[ctl->DP]->prio;
-
- err = gred_change_vq(sch, table->def, ctl, prio, stab);
- if (err < 0)
- goto errout_locked;
- }
-
- table->initd = 1;
-
if (gred_rio_mode(table)) {
gred_disable_wred_mode(table);
if (gred_wred_mode_check(sch))
.DPs = table->DPs,
.def_DP = table->def,
.grio = gred_rio_mode(table),
+ .flags = table->red_flags,
};
opts = RTA_NEST(skb, TCA_OPTIONS);
struct gred_sched *table = qdisc_priv(sch);
int i;
- for (i = 0;i < table->DPs; i++) {
+ for (i = 0; i < table->DPs; i++) {
if (table->tab[i])
gred_destroy_vq(table->tab[i]);
}
}
static struct Qdisc_ops gred_qdisc_ops = {
- .next = NULL,
- .cl_ops = NULL,
.id = "gred",
.priv_size = sizeof(struct gred_sched),
.enqueue = gred_enqueue,
{
return register_qdisc(&gred_qdisc_ops);
}
-static void __exit gred_module_exit(void)
+
+static void __exit gred_module_exit(void)
{
unregister_qdisc(&gred_qdisc_ops);
}
+
module_init(gred_module_init)
module_exit(gred_module_exit)
+
MODULE_LICENSE("GPL");