2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/string.h>
17 #include <linux/errno.h>
18 #include <linux/skbuff.h>
19 #include <net/netlink.h>
20 #include <net/pkt_sched.h>
23 /* Class-Based Queueing (CBQ) algorithm.
24 =======================================
26 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
27 Management Models for Packet Networks",
28 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
30 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
32 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
35 [4] Sally Floyd and Michael Speer, "Experimental Results
36 for Class-Based Queueing", 1998, not published.
38 -----------------------------------------------------------------------
40 Algorithm skeleton was taken from NS simulator cbq.cc.
41 If someone wants to check this code against the LBL version,
42 he should take into account that ONLY the skeleton was borrowed,
43 the implementation is different. Particularly:
45 --- The WRR algorithm is different. Our version looks more
46 reasonable (I hope) and works when quanta are allowed to be
47 less than MTU, which is always the case when real time classes
48 have small rates. Note, that the statement of [3] is
49 incomplete, delay may actually be estimated even if class
50 per-round allotment is less than MTU. Namely, if per-round
51 allotment is W*r_i, and r_1+...+r_k = r < 1
53 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
55 In the worst case we have IntServ estimate with D = W*r+k*MTU
56 and C = MTU*r. The proof (if correct at all) is trivial.
59 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
60 interpret some places, which look like wrong translations
61 from NS. Anyone is advised to find these differences
62 and explain to me, why I am wrong 8).
64 --- Linux has no EOI event, so that we cannot estimate true class
65 idle time. Workaround is to consider the next dequeue event
66 as sign that previous packet is finished. This is wrong because of
67 internal device queueing, but on a permanently loaded link it is true.
68 Moreover, combined with clock integrator, this scheme looks
69 very close to an ideal solution. */
71 struct cbq_sched_data;
76 struct Qdisc_class_common common;
77 struct cbq_class *next_alive; /* next class with backlog in this priority band */
80 unsigned char priority; /* class priority */
81 unsigned char priority2; /* priority to be used after overlimit */
82 unsigned char ewma_log; /* time constant for idle time calculation */
83 unsigned char ovl_strategy;
84 #ifdef CONFIG_NET_CLS_ACT
90 /* Link-sharing scheduler parameters */
91 long maxidle; /* Class parameters: see below. */
95 struct qdisc_rate_table *R_tab;
97 /* Overlimit strategy parameters */
98 void (*overlimit)(struct cbq_class *cl);
99 psched_tdiff_t penalty;
101 /* General scheduler (WRR) parameters */
103 long quantum; /* Allotment per WRR round */
104 long weight; /* Relative allotment: see below */
106 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
107 struct cbq_class *split; /* Ptr to split node */
108 struct cbq_class *share; /* Ptr to LS parent in the class tree */
109 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
110 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
112 struct cbq_class *sibling; /* Sibling chain */
113 struct cbq_class *children; /* Pointer to children chain */
115 struct Qdisc *q; /* Elementary queueing discipline */
119 unsigned char cpriority; /* Effective priority */
120 unsigned char delayed;
121 unsigned char level; /* level of the class in hierarchy:
122 0 for leaf classes, and maximal
123 level of children + 1 for nodes.
126 psched_time_t last; /* Last end of service */
127 psched_time_t undertime;
129 long deficit; /* Saved deficit for WRR */
130 psched_time_t penalized;
131 struct gnet_stats_basic bstats;
132 struct gnet_stats_queue qstats;
133 struct gnet_stats_rate_est rate_est;
134 struct tc_cbq_xstats xstats;
136 struct tcf_proto *filter_list;
141 struct cbq_class *defaults[TC_PRIO_MAX+1];
144 struct cbq_sched_data
146 struct Qdisc_class_hash clhash; /* Hash table of all classes */
147 int nclasses[TC_CBQ_MAXPRIO+1];
148 unsigned quanta[TC_CBQ_MAXPRIO+1];
150 struct cbq_class link;
153 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
156 #ifdef CONFIG_NET_CLS_ACT
157 struct cbq_class *rx_class;
159 struct cbq_class *tx_class;
160 struct cbq_class *tx_borrowed;
162 psched_time_t now; /* Cached timestamp */
163 psched_time_t now_rt; /* Cached real time */
166 struct hrtimer delay_timer;
167 struct qdisc_watchdog watchdog; /* Watchdog timer,
171 psched_tdiff_t wd_expires;
177 #define L2T(cl,len) qdisc_l2t((cl)->R_tab,len)
179 static __inline__ struct cbq_class *
180 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
182 struct Qdisc_class_common *clc;
184 clc = qdisc_class_find(&q->clhash, classid);
187 return container_of(clc, struct cbq_class, common);
190 #ifdef CONFIG_NET_CLS_ACT
192 static struct cbq_class *
193 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
195 struct cbq_class *cl, *new;
197 for (cl = this->tparent; cl; cl = cl->tparent)
198 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
206 /* Classify packet. The procedure is pretty complicated, but
207 it allows us to combine link sharing and priority scheduling
210 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
211 so that it resolves to split nodes. Then packets are classified
212 by logical priority, or a more specific classifier may be attached
216 static struct cbq_class *
217 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
219 struct cbq_sched_data *q = qdisc_priv(sch);
220 struct cbq_class *head = &q->link;
221 struct cbq_class **defmap;
222 struct cbq_class *cl = NULL;
223 u32 prio = skb->priority;
224 struct tcf_result res;
227 * Step 1. If skb->priority points to one of our classes, use it.
229 if (TC_H_MAJ(prio^sch->handle) == 0 &&
230 (cl = cbq_class_lookup(q, prio)) != NULL)
233 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
236 defmap = head->defaults;
239 * Step 2+n. Apply classifier.
241 if (!head->filter_list ||
242 (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
245 if ((cl = (void*)res.class) == NULL) {
246 if (TC_H_MAJ(res.classid))
247 cl = cbq_class_lookup(q, res.classid);
248 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
249 cl = defmap[TC_PRIO_BESTEFFORT];
251 if (cl == NULL || cl->level >= head->level)
255 #ifdef CONFIG_NET_CLS_ACT
259 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
262 case TC_ACT_RECLASSIFY:
263 return cbq_reclassify(skb, cl);
270 * Step 3+n. If classifier selected a link sharing class,
271 * apply agency specific classifier.
272 * Repeat this procdure until we hit a leaf node.
281 * Step 4. No success...
283 if (TC_H_MAJ(prio) == 0 &&
284 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
285 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
292 A packet has just been enqueued on the empty class.
293 cbq_activate_class adds it to the tail of active class list
294 of its priority band.
297 static __inline__ void cbq_activate_class(struct cbq_class *cl)
299 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
300 int prio = cl->cpriority;
301 struct cbq_class *cl_tail;
303 cl_tail = q->active[prio];
304 q->active[prio] = cl;
306 if (cl_tail != NULL) {
307 cl->next_alive = cl_tail->next_alive;
308 cl_tail->next_alive = cl;
311 q->activemask |= (1<<prio);
316 Unlink class from active chain.
317 Note that this same procedure is done directly in cbq_dequeue*
318 during round-robin procedure.
321 static void cbq_deactivate_class(struct cbq_class *this)
323 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
324 int prio = this->cpriority;
325 struct cbq_class *cl;
326 struct cbq_class *cl_prev = q->active[prio];
329 cl = cl_prev->next_alive;
331 cl_prev->next_alive = cl->next_alive;
332 cl->next_alive = NULL;
334 if (cl == q->active[prio]) {
335 q->active[prio] = cl_prev;
336 if (cl == q->active[prio]) {
337 q->active[prio] = NULL;
338 q->activemask &= ~(1<<prio);
344 } while ((cl_prev = cl) != q->active[prio]);
348 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
350 int toplevel = q->toplevel;
352 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
356 now = psched_get_time();
357 incr = now - q->now_rt;
361 if (cl->undertime < now) {
362 q->toplevel = cl->level;
365 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
370 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
372 struct cbq_sched_data *q = qdisc_priv(sch);
373 int uninitialized_var(ret);
374 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
376 #ifdef CONFIG_NET_CLS_ACT
380 if (ret & __NET_XMIT_BYPASS)
386 #ifdef CONFIG_NET_CLS_ACT
387 cl->q->__parent = sch;
389 ret = qdisc_enqueue(skb, cl->q);
390 if (ret == NET_XMIT_SUCCESS) {
392 sch->bstats.packets++;
393 sch->bstats.bytes += qdisc_pkt_len(skb);
394 cbq_mark_toplevel(q, cl);
396 cbq_activate_class(cl);
400 if (net_xmit_drop_count(ret)) {
402 cbq_mark_toplevel(q, cl);
409 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
411 struct cbq_sched_data *q = qdisc_priv(sch);
412 struct cbq_class *cl;
415 if ((cl = q->tx_class) == NULL) {
422 cbq_mark_toplevel(q, cl);
424 #ifdef CONFIG_NET_CLS_ACT
426 cl->q->__parent = sch;
428 if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
430 sch->qstats.requeues++;
432 cbq_activate_class(cl);
435 if (net_xmit_drop_count(ret)) {
442 /* Overlimit actions */
444 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
446 static void cbq_ovl_classic(struct cbq_class *cl)
448 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
449 psched_tdiff_t delay = cl->undertime - q->now;
452 delay += cl->offtime;
455 Class goes to sleep, so that it will have no
456 chance to work avgidle. Let's forgive it 8)
458 BTW cbq-2.0 has a crap in this
459 place, apparently they forgot to shift it by cl->ewma_log.
462 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
463 if (cl->avgidle < cl->minidle)
464 cl->avgidle = cl->minidle;
467 cl->undertime = q->now + delay;
469 cl->xstats.overactions++;
472 if (q->wd_expires == 0 || q->wd_expires > delay)
473 q->wd_expires = delay;
475 /* Dirty work! We must schedule wakeups based on
476 real available rate, rather than leaf rate,
477 which may be tiny (even zero).
479 if (q->toplevel == TC_CBQ_MAXLEVEL) {
481 psched_tdiff_t base_delay = q->wd_expires;
483 for (b = cl->borrow; b; b = b->borrow) {
484 delay = b->undertime - q->now;
485 if (delay < base_delay) {
492 q->wd_expires = base_delay;
496 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
500 static void cbq_ovl_rclassic(struct cbq_class *cl)
502 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
503 struct cbq_class *this = cl;
506 if (cl->level > q->toplevel) {
510 } while ((cl = cl->borrow) != NULL);
517 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
519 static void cbq_ovl_delay(struct cbq_class *cl)
521 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
522 psched_tdiff_t delay = cl->undertime - q->now;
525 psched_time_t sched = q->now;
528 delay += cl->offtime;
530 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
531 if (cl->avgidle < cl->minidle)
532 cl->avgidle = cl->minidle;
533 cl->undertime = q->now + delay;
536 sched += delay + cl->penalty;
537 cl->penalized = sched;
538 cl->cpriority = TC_CBQ_MAXPRIO;
539 q->pmask |= (1<<TC_CBQ_MAXPRIO);
541 expires = ktime_set(0, 0);
542 expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
543 if (hrtimer_try_to_cancel(&q->delay_timer) &&
544 ktime_to_ns(ktime_sub(q->delay_timer.expires,
546 q->delay_timer.expires = expires;
547 hrtimer_restart(&q->delay_timer);
549 cl->xstats.overactions++;
554 if (q->wd_expires == 0 || q->wd_expires > delay)
555 q->wd_expires = delay;
558 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
560 static void cbq_ovl_lowprio(struct cbq_class *cl)
562 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
564 cl->penalized = q->now + cl->penalty;
566 if (cl->cpriority != cl->priority2) {
567 cl->cpriority = cl->priority2;
568 q->pmask |= (1<<cl->cpriority);
569 cl->xstats.overactions++;
574 /* TC_CBQ_OVL_DROP: penalize class by dropping */
576 static void cbq_ovl_drop(struct cbq_class *cl)
578 if (cl->q->ops->drop)
579 if (cl->q->ops->drop(cl->q))
581 cl->xstats.overactions++;
585 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
588 struct cbq_class *cl;
589 struct cbq_class *cl_prev = q->active[prio];
590 psched_time_t sched = now;
596 cl = cl_prev->next_alive;
597 if (now - cl->penalized > 0) {
598 cl_prev->next_alive = cl->next_alive;
599 cl->next_alive = NULL;
600 cl->cpriority = cl->priority;
602 cbq_activate_class(cl);
604 if (cl == q->active[prio]) {
605 q->active[prio] = cl_prev;
606 if (cl == q->active[prio]) {
607 q->active[prio] = NULL;
612 cl = cl_prev->next_alive;
613 } else if (sched - cl->penalized > 0)
614 sched = cl->penalized;
615 } while ((cl_prev = cl) != q->active[prio]);
620 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
622 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
624 struct Qdisc *sch = q->watchdog.qdisc;
626 psched_tdiff_t delay = 0;
629 now = psched_get_time();
635 int prio = ffz(~pmask);
640 tmp = cbq_undelay_prio(q, prio, now);
643 if (tmp < delay || delay == 0)
651 time = ktime_set(0, 0);
652 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
653 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
656 sch->flags &= ~TCQ_F_THROTTLED;
657 __netif_schedule(sch);
658 return HRTIMER_NORESTART;
661 #ifdef CONFIG_NET_CLS_ACT
662 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
664 struct Qdisc *sch = child->__parent;
665 struct cbq_sched_data *q = qdisc_priv(sch);
666 struct cbq_class *cl = q->rx_class;
670 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
673 cbq_mark_toplevel(q, cl);
676 cl->q->__parent = sch;
678 ret = qdisc_enqueue(skb, cl->q);
679 if (ret == NET_XMIT_SUCCESS) {
681 sch->bstats.packets++;
682 sch->bstats.bytes += qdisc_pkt_len(skb);
684 cbq_activate_class(cl);
687 if (net_xmit_drop_count(ret))
698 It is mission critical procedure.
700 We "regenerate" toplevel cutoff, if transmitting class
701 has backlog and it is not regulated. It is not part of
702 original CBQ description, but looks more reasonable.
703 Probably, it is wrong. This question needs further investigation.
706 static __inline__ void
707 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
708 struct cbq_class *borrowed)
710 if (cl && q->toplevel >= borrowed->level) {
711 if (cl->q->q.qlen > 1) {
713 if (borrowed->undertime == PSCHED_PASTPERFECT) {
714 q->toplevel = borrowed->level;
717 } while ((borrowed=borrowed->borrow) != NULL);
720 /* It is not necessary now. Uncommenting it
721 will save CPU cycles, but decrease fairness.
723 q->toplevel = TC_CBQ_MAXLEVEL;
729 cbq_update(struct cbq_sched_data *q)
731 struct cbq_class *this = q->tx_class;
732 struct cbq_class *cl = this;
737 for ( ; cl; cl = cl->share) {
738 long avgidle = cl->avgidle;
741 cl->bstats.packets++;
742 cl->bstats.bytes += len;
745 (now - last) is total time between packet right edges.
746 (last_pktlen/rate) is "virtual" busy time, so that
748 idle = (now - last) - last_pktlen/rate
751 idle = q->now - cl->last;
752 if ((unsigned long)idle > 128*1024*1024) {
753 avgidle = cl->maxidle;
755 idle -= L2T(cl, len);
757 /* true_avgidle := (1-W)*true_avgidle + W*idle,
758 where W=2^{-ewma_log}. But cl->avgidle is scaled:
759 cl->avgidle == true_avgidle/W,
762 avgidle += idle - (avgidle>>cl->ewma_log);
766 /* Overlimit or at-limit */
768 if (avgidle < cl->minidle)
769 avgidle = cl->minidle;
771 cl->avgidle = avgidle;
773 /* Calculate expected time, when this class
774 will be allowed to send.
776 (1-W)*true_avgidle + W*delay = 0, i.e.
777 idle = (1/W - 1)*(-true_avgidle)
779 idle = (1 - W)*(-cl->avgidle);
781 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
785 To maintain the rate allocated to the class,
786 we add to undertime virtual clock,
787 necessary to complete transmitted packet.
788 (len/phys_bandwidth has been already passed
789 to the moment of cbq_update)
792 idle -= L2T(&q->link, len);
793 idle += L2T(cl, len);
795 cl->undertime = q->now + idle;
799 cl->undertime = PSCHED_PASTPERFECT;
800 if (avgidle > cl->maxidle)
801 cl->avgidle = cl->maxidle;
803 cl->avgidle = avgidle;
808 cbq_update_toplevel(q, this, q->tx_borrowed);
811 static __inline__ struct cbq_class *
812 cbq_under_limit(struct cbq_class *cl)
814 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
815 struct cbq_class *this_cl = cl;
817 if (cl->tparent == NULL)
820 if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
826 /* It is very suspicious place. Now overlimit
827 action is generated for not bounded classes
828 only if link is completely congested.
829 Though it is in agree with ancestor-only paradigm,
830 it looks very stupid. Particularly,
831 it means that this chunk of code will either
832 never be called or result in strong amplification
833 of burstiness. Dangerous, silly, and, however,
834 no another solution exists.
836 if ((cl = cl->borrow) == NULL) {
837 this_cl->qstats.overlimits++;
838 this_cl->overlimit(this_cl);
841 if (cl->level > q->toplevel)
843 } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
849 static __inline__ struct sk_buff *
850 cbq_dequeue_prio(struct Qdisc *sch, int prio)
852 struct cbq_sched_data *q = qdisc_priv(sch);
853 struct cbq_class *cl_tail, *cl_prev, *cl;
857 cl_tail = cl_prev = q->active[prio];
858 cl = cl_prev->next_alive;
865 struct cbq_class *borrow = cl;
868 (borrow = cbq_under_limit(cl)) == NULL)
871 if (cl->deficit <= 0) {
872 /* Class exhausted its allotment per
873 this round. Switch to the next one.
876 cl->deficit += cl->quantum;
880 skb = cl->q->dequeue(cl->q);
882 /* Class did not give us any skb :-(
883 It could occur even if cl->q->q.qlen != 0
884 f.e. if cl->q == "tbf"
889 cl->deficit -= qdisc_pkt_len(skb);
891 q->tx_borrowed = borrow;
893 #ifndef CBQ_XSTATS_BORROWS_BYTES
894 borrow->xstats.borrows++;
895 cl->xstats.borrows++;
897 borrow->xstats.borrows += qdisc_pkt_len(skb);
898 cl->xstats.borrows += qdisc_pkt_len(skb);
901 q->tx_len = qdisc_pkt_len(skb);
903 if (cl->deficit <= 0) {
904 q->active[prio] = cl;
906 cl->deficit += cl->quantum;
911 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
912 /* Class is empty or penalized.
913 Unlink it from active chain.
915 cl_prev->next_alive = cl->next_alive;
916 cl->next_alive = NULL;
918 /* Did cl_tail point to it? */
923 /* Was it the last class in this band? */
926 q->active[prio] = NULL;
927 q->activemask &= ~(1<<prio);
929 cbq_activate_class(cl);
933 q->active[prio] = cl_tail;
936 cbq_activate_class(cl);
944 } while (cl_prev != cl_tail);
947 q->active[prio] = cl_prev;
952 static __inline__ struct sk_buff *
953 cbq_dequeue_1(struct Qdisc *sch)
955 struct cbq_sched_data *q = qdisc_priv(sch);
959 activemask = q->activemask&0xFF;
961 int prio = ffz(~activemask);
962 activemask &= ~(1<<prio);
963 skb = cbq_dequeue_prio(sch, prio);
970 static struct sk_buff *
971 cbq_dequeue(struct Qdisc *sch)
974 struct cbq_sched_data *q = qdisc_priv(sch);
978 now = psched_get_time();
979 incr = now - q->now_rt;
982 psched_tdiff_t incr2;
983 /* Time integrator. We calculate EOS time
984 by adding expected packet transmission time.
985 If real time is greater, we warp artificial clock,
988 cbq_time = max(real_time, work);
990 incr2 = L2T(&q->link, q->tx_len);
993 if ((incr -= incr2) < 0)
1002 skb = cbq_dequeue_1(sch);
1005 sch->flags &= ~TCQ_F_THROTTLED;
1009 /* All the classes are overlimit.
1013 1. Scheduler is empty.
1014 2. Toplevel cutoff inhibited borrowing.
1015 3. Root class is overlimit.
1017 Reset 2d and 3d conditions and retry.
1019 Note, that NS and cbq-2.0 are buggy, peeking
1020 an arbitrary class is appropriate for ancestor-only
1021 sharing, but not for toplevel algorithm.
1023 Our version is better, but slower, because it requires
1024 two passes, but it is unavoidable with top-level sharing.
1027 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1028 q->link.undertime == PSCHED_PASTPERFECT)
1031 q->toplevel = TC_CBQ_MAXLEVEL;
1032 q->link.undertime = PSCHED_PASTPERFECT;
1035 /* No packets in scheduler or nobody wants to give them to us :-(
1036 Sigh... start watchdog timer in the last case. */
1039 sch->qstats.overlimits++;
1041 qdisc_watchdog_schedule(&q->watchdog,
1042 now + q->wd_expires);
1047 /* CBQ class maintanance routines */
1049 static void cbq_adjust_levels(struct cbq_class *this)
1056 struct cbq_class *cl;
1058 if ((cl = this->children) != NULL) {
1060 if (cl->level > level)
1062 } while ((cl = cl->sibling) != this->children);
1064 this->level = level+1;
1065 } while ((this = this->tparent) != NULL);
1068 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1070 struct cbq_class *cl;
1071 struct hlist_node *n;
1074 if (q->quanta[prio] == 0)
1077 for (h = 0; h < q->clhash.hashsize; h++) {
1078 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1079 /* BUGGGG... Beware! This expression suffer of
1080 arithmetic overflows!
1082 if (cl->priority == prio) {
1083 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1086 if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
1087 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
1088 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1094 static void cbq_sync_defmap(struct cbq_class *cl)
1096 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1097 struct cbq_class *split = cl->split;
1104 for (i=0; i<=TC_PRIO_MAX; i++) {
1105 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1106 split->defaults[i] = NULL;
1109 for (i=0; i<=TC_PRIO_MAX; i++) {
1110 int level = split->level;
1112 if (split->defaults[i])
1115 for (h = 0; h < q->clhash.hashsize; h++) {
1116 struct hlist_node *n;
1117 struct cbq_class *c;
1119 hlist_for_each_entry(c, n, &q->clhash.hash[h],
1121 if (c->split == split && c->level < level &&
1123 split->defaults[i] = c;
1131 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1133 struct cbq_class *split = NULL;
1136 if ((split = cl->split) == NULL)
1138 splitid = split->common.classid;
1141 if (split == NULL || split->common.classid != splitid) {
1142 for (split = cl->tparent; split; split = split->tparent)
1143 if (split->common.classid == splitid)
1150 if (cl->split != split) {
1152 cbq_sync_defmap(cl);
1154 cl->defmap = def&mask;
1156 cl->defmap = (cl->defmap&~mask)|(def&mask);
1158 cbq_sync_defmap(cl);
1161 static void cbq_unlink_class(struct cbq_class *this)
1163 struct cbq_class *cl, **clp;
1164 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1166 qdisc_class_hash_remove(&q->clhash, &this->common);
1168 if (this->tparent) {
1177 } while ((cl = *clp) != this->sibling);
1179 if (this->tparent->children == this) {
1180 this->tparent->children = this->sibling;
1181 if (this->sibling == this)
1182 this->tparent->children = NULL;
1185 WARN_ON(this->sibling != this);
1189 static void cbq_link_class(struct cbq_class *this)
1191 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1192 struct cbq_class *parent = this->tparent;
1194 this->sibling = this;
1195 qdisc_class_hash_insert(&q->clhash, &this->common);
1200 if (parent->children == NULL) {
1201 parent->children = this;
1203 this->sibling = parent->children->sibling;
1204 parent->children->sibling = this;
1208 static unsigned int cbq_drop(struct Qdisc* sch)
1210 struct cbq_sched_data *q = qdisc_priv(sch);
1211 struct cbq_class *cl, *cl_head;
1215 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1216 if ((cl_head = q->active[prio]) == NULL)
1221 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1224 cbq_deactivate_class(cl);
1227 } while ((cl = cl->next_alive) != cl_head);
1233 cbq_reset(struct Qdisc* sch)
1235 struct cbq_sched_data *q = qdisc_priv(sch);
1236 struct cbq_class *cl;
1237 struct hlist_node *n;
1244 q->tx_borrowed = NULL;
1245 qdisc_watchdog_cancel(&q->watchdog);
1246 hrtimer_cancel(&q->delay_timer);
1247 q->toplevel = TC_CBQ_MAXLEVEL;
1248 q->now = psched_get_time();
1251 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1252 q->active[prio] = NULL;
1254 for (h = 0; h < q->clhash.hashsize; h++) {
1255 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1258 cl->next_alive = NULL;
1259 cl->undertime = PSCHED_PASTPERFECT;
1260 cl->avgidle = cl->maxidle;
1261 cl->deficit = cl->quantum;
1262 cl->cpriority = cl->priority;
1269 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1271 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1272 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1273 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1275 if (lss->change&TCF_CBQ_LSS_EWMA)
1276 cl->ewma_log = lss->ewma_log;
1277 if (lss->change&TCF_CBQ_LSS_AVPKT)
1278 cl->avpkt = lss->avpkt;
1279 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1280 cl->minidle = -(long)lss->minidle;
1281 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1282 cl->maxidle = lss->maxidle;
1283 cl->avgidle = lss->maxidle;
1285 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1286 cl->offtime = lss->offtime;
1290 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1292 q->nclasses[cl->priority]--;
1293 q->quanta[cl->priority] -= cl->weight;
1294 cbq_normalize_quanta(q, cl->priority);
1297 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1299 q->nclasses[cl->priority]++;
1300 q->quanta[cl->priority] += cl->weight;
1301 cbq_normalize_quanta(q, cl->priority);
1304 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1306 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1309 cl->allot = wrr->allot;
1311 cl->weight = wrr->weight;
1312 if (wrr->priority) {
1313 cl->priority = wrr->priority-1;
1314 cl->cpriority = cl->priority;
1315 if (cl->priority >= cl->priority2)
1316 cl->priority2 = TC_CBQ_MAXPRIO-1;
1323 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1325 switch (ovl->strategy) {
1326 case TC_CBQ_OVL_CLASSIC:
1327 cl->overlimit = cbq_ovl_classic;
1329 case TC_CBQ_OVL_DELAY:
1330 cl->overlimit = cbq_ovl_delay;
1332 case TC_CBQ_OVL_LOWPRIO:
1333 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1334 ovl->priority2-1 <= cl->priority)
1336 cl->priority2 = ovl->priority2-1;
1337 cl->overlimit = cbq_ovl_lowprio;
1339 case TC_CBQ_OVL_DROP:
1340 cl->overlimit = cbq_ovl_drop;
1342 case TC_CBQ_OVL_RCLASSIC:
1343 cl->overlimit = cbq_ovl_rclassic;
1348 cl->penalty = ovl->penalty;
1352 #ifdef CONFIG_NET_CLS_ACT
1353 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1355 cl->police = p->police;
1357 if (cl->q->handle) {
1358 if (p->police == TC_POLICE_RECLASSIFY)
1359 cl->q->reshape_fail = cbq_reshape_fail;
1361 cl->q->reshape_fail = NULL;
1367 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1369 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1373 static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1374 [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) },
1375 [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) },
1376 [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) },
1377 [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) },
1378 [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) },
1379 [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1380 [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) },
1383 static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1385 struct cbq_sched_data *q = qdisc_priv(sch);
1386 struct nlattr *tb[TCA_CBQ_MAX + 1];
1387 struct tc_ratespec *r;
1390 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1394 if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1397 r = nla_data(tb[TCA_CBQ_RATE]);
1399 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1402 err = qdisc_class_hash_init(&q->clhash);
1407 q->link.sibling = &q->link;
1408 q->link.common.classid = sch->handle;
1409 q->link.qdisc = sch;
1410 if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1413 q->link.q = &noop_qdisc;
1415 q->link.priority = TC_CBQ_MAXPRIO-1;
1416 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1417 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1418 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1419 q->link.overlimit = cbq_ovl_classic;
1420 q->link.allot = psched_mtu(qdisc_dev(sch));
1421 q->link.quantum = q->link.allot;
1422 q->link.weight = q->link.R_tab->rate.rate;
1424 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1425 q->link.avpkt = q->link.allot/2;
1426 q->link.minidle = -0x7FFFFFFF;
1428 qdisc_watchdog_init(&q->watchdog, sch);
1429 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1430 q->delay_timer.function = cbq_undelay;
1431 q->toplevel = TC_CBQ_MAXLEVEL;
1432 q->now = psched_get_time();
1435 cbq_link_class(&q->link);
1437 if (tb[TCA_CBQ_LSSOPT])
1438 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1440 cbq_addprio(q, &q->link);
1444 qdisc_put_rtab(q->link.R_tab);
1448 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1450 unsigned char *b = skb_tail_pointer(skb);
1452 NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1460 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1462 unsigned char *b = skb_tail_pointer(skb);
1463 struct tc_cbq_lssopt opt;
1466 if (cl->borrow == NULL)
1467 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1468 if (cl->share == NULL)
1469 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1470 opt.ewma_log = cl->ewma_log;
1471 opt.level = cl->level;
1472 opt.avpkt = cl->avpkt;
1473 opt.maxidle = cl->maxidle;
1474 opt.minidle = (u32)(-cl->minidle);
1475 opt.offtime = cl->offtime;
1477 NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1485 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1487 unsigned char *b = skb_tail_pointer(skb);
1488 struct tc_cbq_wrropt opt;
1491 opt.allot = cl->allot;
1492 opt.priority = cl->priority+1;
1493 opt.cpriority = cl->cpriority+1;
1494 opt.weight = cl->weight;
1495 NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1503 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1505 unsigned char *b = skb_tail_pointer(skb);
1506 struct tc_cbq_ovl opt;
1508 opt.strategy = cl->ovl_strategy;
1509 opt.priority2 = cl->priority2+1;
1511 opt.penalty = cl->penalty;
1512 NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1520 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1522 unsigned char *b = skb_tail_pointer(skb);
1523 struct tc_cbq_fopt opt;
1525 if (cl->split || cl->defmap) {
1526 opt.split = cl->split ? cl->split->common.classid : 0;
1527 opt.defmap = cl->defmap;
1529 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1538 #ifdef CONFIG_NET_CLS_ACT
1539 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1541 unsigned char *b = skb_tail_pointer(skb);
1542 struct tc_cbq_police opt;
1545 opt.police = cl->police;
1548 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1558 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1560 if (cbq_dump_lss(skb, cl) < 0 ||
1561 cbq_dump_rate(skb, cl) < 0 ||
1562 cbq_dump_wrr(skb, cl) < 0 ||
1563 cbq_dump_ovl(skb, cl) < 0 ||
1564 #ifdef CONFIG_NET_CLS_ACT
1565 cbq_dump_police(skb, cl) < 0 ||
1567 cbq_dump_fopt(skb, cl) < 0)
1572 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1574 struct cbq_sched_data *q = qdisc_priv(sch);
1575 struct nlattr *nest;
1577 nest = nla_nest_start(skb, TCA_OPTIONS);
1579 goto nla_put_failure;
1580 if (cbq_dump_attr(skb, &q->link) < 0)
1581 goto nla_put_failure;
1582 nla_nest_end(skb, nest);
1586 nla_nest_cancel(skb, nest);
1591 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1593 struct cbq_sched_data *q = qdisc_priv(sch);
1595 q->link.xstats.avgidle = q->link.avgidle;
1596 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1600 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1601 struct sk_buff *skb, struct tcmsg *tcm)
1603 struct cbq_class *cl = (struct cbq_class*)arg;
1604 struct nlattr *nest;
1607 tcm->tcm_parent = cl->tparent->common.classid;
1609 tcm->tcm_parent = TC_H_ROOT;
1610 tcm->tcm_handle = cl->common.classid;
1611 tcm->tcm_info = cl->q->handle;
1613 nest = nla_nest_start(skb, TCA_OPTIONS);
1615 goto nla_put_failure;
1616 if (cbq_dump_attr(skb, cl) < 0)
1617 goto nla_put_failure;
1618 nla_nest_end(skb, nest);
1622 nla_nest_cancel(skb, nest);
1627 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1628 struct gnet_dump *d)
1630 struct cbq_sched_data *q = qdisc_priv(sch);
1631 struct cbq_class *cl = (struct cbq_class*)arg;
1633 cl->qstats.qlen = cl->q->q.qlen;
1634 cl->xstats.avgidle = cl->avgidle;
1635 cl->xstats.undertime = 0;
1637 if (cl->undertime != PSCHED_PASTPERFECT)
1638 cl->xstats.undertime = cl->undertime - q->now;
1640 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1641 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1642 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1645 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1648 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1651 struct cbq_class *cl = (struct cbq_class*)arg;
1655 new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1657 cl->common.classid);
1661 #ifdef CONFIG_NET_CLS_ACT
1662 if (cl->police == TC_POLICE_RECLASSIFY)
1663 new->reshape_fail = cbq_reshape_fail;
1667 *old = xchg(&cl->q, new);
1668 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1670 sch_tree_unlock(sch);
1677 static struct Qdisc *
1678 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1680 struct cbq_class *cl = (struct cbq_class*)arg;
1682 return cl ? cl->q : NULL;
1685 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1687 struct cbq_class *cl = (struct cbq_class *)arg;
1689 if (cl->q->q.qlen == 0)
1690 cbq_deactivate_class(cl);
1693 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1695 struct cbq_sched_data *q = qdisc_priv(sch);
1696 struct cbq_class *cl = cbq_class_lookup(q, classid);
1700 return (unsigned long)cl;
1705 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1707 struct cbq_sched_data *q = qdisc_priv(sch);
1709 WARN_ON(cl->filters);
1711 tcf_destroy_chain(&cl->filter_list);
1712 qdisc_destroy(cl->q);
1713 qdisc_put_rtab(cl->R_tab);
1714 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1720 cbq_destroy(struct Qdisc* sch)
1722 struct cbq_sched_data *q = qdisc_priv(sch);
1723 struct hlist_node *n, *next;
1724 struct cbq_class *cl;
1727 #ifdef CONFIG_NET_CLS_ACT
1731 * Filters must be destroyed first because we don't destroy the
1732 * classes from root to leafs which means that filters can still
1733 * be bound to classes which have been destroyed already. --TGR '04
1735 for (h = 0; h < q->clhash.hashsize; h++) {
1736 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
1737 tcf_destroy_chain(&cl->filter_list);
1739 for (h = 0; h < q->clhash.hashsize; h++) {
1740 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1742 cbq_destroy_class(sch, cl);
1744 qdisc_class_hash_destroy(&q->clhash);
1747 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1749 struct cbq_class *cl = (struct cbq_class*)arg;
1751 if (--cl->refcnt == 0) {
1752 #ifdef CONFIG_NET_CLS_ACT
1753 spinlock_t *root_lock = qdisc_root_lock(sch);
1754 struct cbq_sched_data *q = qdisc_priv(sch);
1756 spin_lock_bh(root_lock);
1757 if (q->rx_class == cl)
1759 spin_unlock_bh(root_lock);
1762 cbq_destroy_class(sch, cl);
1767 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1771 struct cbq_sched_data *q = qdisc_priv(sch);
1772 struct cbq_class *cl = (struct cbq_class*)*arg;
1773 struct nlattr *opt = tca[TCA_OPTIONS];
1774 struct nlattr *tb[TCA_CBQ_MAX + 1];
1775 struct cbq_class *parent;
1776 struct qdisc_rate_table *rtab = NULL;
1781 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1789 cl->tparent->common.classid != parentid)
1791 if (!cl->tparent && parentid != TC_H_ROOT)
1795 if (tb[TCA_CBQ_RATE]) {
1796 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1801 /* Change class parameters */
1804 if (cl->next_alive != NULL)
1805 cbq_deactivate_class(cl);
1808 rtab = xchg(&cl->R_tab, rtab);
1809 qdisc_put_rtab(rtab);
1812 if (tb[TCA_CBQ_LSSOPT])
1813 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1815 if (tb[TCA_CBQ_WRROPT]) {
1817 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1820 if (tb[TCA_CBQ_OVL_STRATEGY])
1821 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1823 #ifdef CONFIG_NET_CLS_ACT
1824 if (tb[TCA_CBQ_POLICE])
1825 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1828 if (tb[TCA_CBQ_FOPT])
1829 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1832 cbq_activate_class(cl);
1834 sch_tree_unlock(sch);
1837 gen_replace_estimator(&cl->bstats, &cl->rate_est,
1838 qdisc_root_lock(sch),
1843 if (parentid == TC_H_ROOT)
1846 if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1847 tb[TCA_CBQ_LSSOPT] == NULL)
1850 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1856 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1860 classid = TC_H_MAKE(sch->handle,0x8000);
1862 for (i=0; i<0x8000; i++) {
1863 if (++q->hgenerator >= 0x8000)
1865 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1871 classid = classid|q->hgenerator;
1876 parent = cbq_class_lookup(q, parentid);
1883 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1889 if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1890 &pfifo_qdisc_ops, classid)))
1891 cl->q = &noop_qdisc;
1892 cl->common.classid = classid;
1893 cl->tparent = parent;
1895 cl->allot = parent->allot;
1896 cl->quantum = cl->allot;
1897 cl->weight = cl->R_tab->rate.rate;
1901 cl->borrow = cl->tparent;
1902 if (cl->tparent != &q->link)
1903 cl->share = cl->tparent;
1904 cbq_adjust_levels(parent);
1905 cl->minidle = -0x7FFFFFFF;
1906 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1907 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1908 if (cl->ewma_log==0)
1909 cl->ewma_log = q->link.ewma_log;
1911 cl->maxidle = q->link.maxidle;
1913 cl->avpkt = q->link.avpkt;
1914 cl->overlimit = cbq_ovl_classic;
1915 if (tb[TCA_CBQ_OVL_STRATEGY])
1916 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1917 #ifdef CONFIG_NET_CLS_ACT
1918 if (tb[TCA_CBQ_POLICE])
1919 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1921 if (tb[TCA_CBQ_FOPT])
1922 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1923 sch_tree_unlock(sch);
1925 qdisc_class_hash_grow(sch, &q->clhash);
1928 gen_new_estimator(&cl->bstats, &cl->rate_est,
1929 qdisc_root_lock(sch), tca[TCA_RATE]);
1931 *arg = (unsigned long)cl;
1935 qdisc_put_rtab(rtab);
1939 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1941 struct cbq_sched_data *q = qdisc_priv(sch);
1942 struct cbq_class *cl = (struct cbq_class*)arg;
1945 if (cl->filters || cl->children || cl == &q->link)
1950 qlen = cl->q->q.qlen;
1952 qdisc_tree_decrease_qlen(cl->q, qlen);
1955 cbq_deactivate_class(cl);
1957 if (q->tx_borrowed == cl)
1958 q->tx_borrowed = q->tx_class;
1959 if (q->tx_class == cl) {
1961 q->tx_borrowed = NULL;
1963 #ifdef CONFIG_NET_CLS_ACT
1964 if (q->rx_class == cl)
1968 cbq_unlink_class(cl);
1969 cbq_adjust_levels(cl->tparent);
1971 cbq_sync_defmap(cl);
1974 sch_tree_unlock(sch);
1976 if (--cl->refcnt == 0)
1977 cbq_destroy_class(sch, cl);
1982 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1984 struct cbq_sched_data *q = qdisc_priv(sch);
1985 struct cbq_class *cl = (struct cbq_class *)arg;
1990 return &cl->filter_list;
1993 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1996 struct cbq_sched_data *q = qdisc_priv(sch);
1997 struct cbq_class *p = (struct cbq_class*)parent;
1998 struct cbq_class *cl = cbq_class_lookup(q, classid);
2001 if (p && p->level <= cl->level)
2004 return (unsigned long)cl;
2009 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2011 struct cbq_class *cl = (struct cbq_class*)arg;
2016 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2018 struct cbq_sched_data *q = qdisc_priv(sch);
2019 struct cbq_class *cl;
2020 struct hlist_node *n;
2026 for (h = 0; h < q->clhash.hashsize; h++) {
2027 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
2028 if (arg->count < arg->skip) {
2032 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2041 static const struct Qdisc_class_ops cbq_class_ops = {
2044 .qlen_notify = cbq_qlen_notify,
2047 .change = cbq_change_class,
2048 .delete = cbq_delete,
2050 .tcf_chain = cbq_find_tcf,
2051 .bind_tcf = cbq_bind_filter,
2052 .unbind_tcf = cbq_unbind_filter,
2053 .dump = cbq_dump_class,
2054 .dump_stats = cbq_dump_class_stats,
2057 static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2059 .cl_ops = &cbq_class_ops,
2061 .priv_size = sizeof(struct cbq_sched_data),
2062 .enqueue = cbq_enqueue,
2063 .dequeue = cbq_dequeue,
2064 .requeue = cbq_requeue,
2068 .destroy = cbq_destroy,
2071 .dump_stats = cbq_dump_stats,
2072 .owner = THIS_MODULE,
2075 static int __init cbq_module_init(void)
2077 return register_qdisc(&cbq_qdisc_ops);
2079 static void __exit cbq_module_exit(void)
2081 unregister_qdisc(&cbq_qdisc_ops);
2083 module_init(cbq_module_init)
2084 module_exit(cbq_module_exit)
2085 MODULE_LICENSE("GPL");