2 * net/sched/sch_api.c Packet scheduler API.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
18 #include <linux/config.h>
19 #include <linux/module.h>
20 #include <linux/types.h>
21 #include <linux/kernel.h>
22 #include <linux/sched.h>
23 #include <linux/string.h>
25 #include <linux/socket.h>
26 #include <linux/sockios.h>
28 #include <linux/errno.h>
29 #include <linux/interrupt.h>
30 #include <linux/netdevice.h>
31 #include <linux/skbuff.h>
32 #include <linux/rtnetlink.h>
33 #include <linux/init.h>
34 #include <linux/proc_fs.h>
35 #include <linux/seq_file.h>
36 #include <linux/kmod.h>
37 #include <linux/list.h>
38 #include <linux/bitops.h>
41 #include <net/pkt_sched.h>
43 #include <asm/processor.h>
44 #include <asm/uaccess.h>
45 #include <asm/system.h>
47 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
48 struct Qdisc *old, struct Qdisc *new);
49 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
50 struct Qdisc *q, unsigned long cl, int event);
57 This file consists of two interrelated parts:
59 1. queueing disciplines manager frontend.
60 2. traffic classes manager frontend.
62 Generally, queueing discipline ("qdisc") is a black box,
63 which is able to enqueue packets and to dequeue them (when
64 device is ready to send something) in order and at times
65 determined by algorithm hidden in it.
67 qdisc's are divided to two categories:
68 - "queues", which have no internal structure visible from outside.
69 - "schedulers", which split all the packets to "traffic classes",
70 using "packet classifiers" (look at cls_api.c)
72 In turn, classes may have child qdiscs (as rule, queues)
73 attached to them etc. etc. etc.
75 The goal of the routines in this file is to translate
76 information supplied by user in the form of handles
77 to more intelligible for kernel form, to make some sanity
78 checks and part of work, which is common to all qdiscs
79 and to provide rtnetlink notifications.
81 All real intelligent work is done inside qdisc modules.
85 Every discipline has two major routines: enqueue and dequeue.
89 dequeue usually returns a skb to send. It is allowed to return NULL,
90 but it does not mean that queue is empty, it just means that
91 discipline does not want to send anything this time.
92 Queue is really empty if q->q.qlen == 0.
93 For complicated disciplines with multiple queues q->q is not
94 real packet queue, but however q->q.qlen must be valid.
98 enqueue returns 0, if packet was enqueued successfully.
99 If packet (this one or another one) was dropped, it returns
101 NET_XMIT_DROP - this packet dropped
102 Expected action: do not backoff, but wait until queue will clear.
103 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
104 Expected action: backoff or ignore
105 NET_XMIT_POLICED - dropped by police.
106 Expected action: backoff or error to real-time apps.
112 requeues once dequeued packet. It is used for non-standard or
113 just buggy devices, which can defer output even if dev->tbusy=0.
117 returns qdisc to initial state: purge all buffers, clear all
118 timers, counters (except for statistics) etc.
122 initializes newly created qdisc.
126 destroys resources allocated by init and during lifetime of qdisc.
130 changes qdisc parameters.
133 /* Protects list of registered TC modules. It is pure SMP lock. */
134 static DEFINE_RWLOCK(qdisc_mod_lock);
137 /************************************************
138 * Queueing disciplines manipulation. *
139 ************************************************/
142 /* The list of all installed queueing disciplines. */
144 static struct Qdisc_ops *qdisc_base;
146 /* Register/uregister queueing discipline */
148 int register_qdisc(struct Qdisc_ops *qops)
150 struct Qdisc_ops *q, **qp;
153 write_lock(&qdisc_mod_lock);
154 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
155 if (!strcmp(qops->id, q->id))
158 if (qops->enqueue == NULL)
159 qops->enqueue = noop_qdisc_ops.enqueue;
160 if (qops->requeue == NULL)
161 qops->requeue = noop_qdisc_ops.requeue;
162 if (qops->dequeue == NULL)
163 qops->dequeue = noop_qdisc_ops.dequeue;
169 write_unlock(&qdisc_mod_lock);
173 int unregister_qdisc(struct Qdisc_ops *qops)
175 struct Qdisc_ops *q, **qp;
178 write_lock(&qdisc_mod_lock);
179 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
187 write_unlock(&qdisc_mod_lock);
191 /* We know handle. Find qdisc among all qdisc's attached to device
192 (root qdisc, all its children, children of children etc.)
195 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
199 read_lock_bh(&qdisc_tree_lock);
200 list_for_each_entry(q, &dev->qdisc_list, list) {
201 if (q->handle == handle) {
202 read_unlock_bh(&qdisc_tree_lock);
206 read_unlock_bh(&qdisc_tree_lock);
210 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
214 struct Qdisc_class_ops *cops = p->ops->cl_ops;
218 cl = cops->get(p, classid);
222 leaf = cops->leaf(p, cl);
227 /* Find queueing discipline by name */
229 static struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind)
231 struct Qdisc_ops *q = NULL;
234 read_lock(&qdisc_mod_lock);
235 for (q = qdisc_base; q; q = q->next) {
236 if (rtattr_strcmp(kind, q->id) == 0) {
237 if (!try_module_get(q->owner))
242 read_unlock(&qdisc_mod_lock);
247 static struct qdisc_rate_table *qdisc_rtab_list;
249 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab)
251 struct qdisc_rate_table *rtab;
253 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
254 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
260 if (tab == NULL || r->rate == 0 || r->cell_log == 0 || RTA_PAYLOAD(tab) != 1024)
263 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
267 memcpy(rtab->data, RTA_DATA(tab), 1024);
268 rtab->next = qdisc_rtab_list;
269 qdisc_rtab_list = rtab;
274 void qdisc_put_rtab(struct qdisc_rate_table *tab)
276 struct qdisc_rate_table *rtab, **rtabp;
278 if (!tab || --tab->refcnt)
281 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
291 /* Allocate an unique handle from space managed by kernel */
293 static u32 qdisc_alloc_handle(struct net_device *dev)
296 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
299 autohandle += TC_H_MAKE(0x10000U, 0);
300 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
301 autohandle = TC_H_MAKE(0x80000000U, 0);
302 } while (qdisc_lookup(dev, autohandle) && --i > 0);
304 return i>0 ? autohandle : 0;
307 /* Attach toplevel qdisc to device dev */
309 static struct Qdisc *
310 dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
312 struct Qdisc *oqdisc;
314 if (dev->flags & IFF_UP)
317 qdisc_lock_tree(dev);
318 if (qdisc && qdisc->flags&TCQ_F_INGRESS) {
319 oqdisc = dev->qdisc_ingress;
320 /* Prune old scheduler */
321 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) {
324 dev->qdisc_ingress = NULL;
326 dev->qdisc_ingress = qdisc;
331 oqdisc = dev->qdisc_sleeping;
333 /* Prune old scheduler */
334 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
337 /* ... and graft new one */
340 dev->qdisc_sleeping = qdisc;
341 dev->qdisc = &noop_qdisc;
344 qdisc_unlock_tree(dev);
346 if (dev->flags & IFF_UP)
353 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
356 Old qdisc is not destroyed but returned in *old.
359 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
361 struct Qdisc *new, struct Qdisc **old)
364 struct Qdisc *q = *old;
367 if (parent == NULL) {
368 if (q && q->flags&TCQ_F_INGRESS) {
369 *old = dev_graft_qdisc(dev, q);
371 *old = dev_graft_qdisc(dev, new);
374 struct Qdisc_class_ops *cops = parent->ops->cl_ops;
379 unsigned long cl = cops->get(parent, classid);
381 err = cops->graft(parent, cl, new, old);
383 new->parent = classid;
384 cops->put(parent, cl);
392 Allocate and initialize new qdisc.
394 Parameters are passed via opt.
397 static struct Qdisc *
398 qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
401 struct rtattr *kind = tca[TCA_KIND-1];
403 struct Qdisc_ops *ops;
405 ops = qdisc_lookup_ops(kind);
407 if (ops == NULL && kind != NULL) {
409 if (rtattr_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
410 /* We dropped the RTNL semaphore in order to
411 * perform the module load. So, even if we
412 * succeeded in loading the module we have to
413 * tell the caller to replay the request. We
414 * indicate this using -EAGAIN.
415 * We replay the request because the device may
416 * go away in the mean time.
419 request_module("sch_%s", name);
421 ops = qdisc_lookup_ops(kind);
423 /* We will try again qdisc_lookup_ops,
424 * so don't keep a reference.
426 module_put(ops->owner);
438 sch = qdisc_alloc(dev, ops);
444 if (handle == TC_H_INGRESS) {
445 sch->flags |= TCQ_F_INGRESS;
446 handle = TC_H_MAKE(TC_H_INGRESS, 0);
447 } else if (handle == 0) {
448 handle = qdisc_alloc_handle(dev);
454 sch->handle = handle;
456 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
457 qdisc_lock_tree(dev);
458 list_add_tail(&sch->list, &dev->qdisc_list);
459 qdisc_unlock_tree(dev);
461 #ifdef CONFIG_NET_ESTIMATOR
463 gen_new_estimator(&sch->bstats, &sch->rate_est,
464 sch->stats_lock, tca[TCA_RATE-1]);
470 kfree((char *) sch - sch->padded);
472 module_put(ops->owner);
478 static int qdisc_change(struct Qdisc *sch, struct rtattr **tca)
480 if (tca[TCA_OPTIONS-1]) {
483 if (sch->ops->change == NULL)
485 err = sch->ops->change(sch, tca[TCA_OPTIONS-1]);
489 #ifdef CONFIG_NET_ESTIMATOR
491 gen_replace_estimator(&sch->bstats, &sch->rate_est,
492 sch->stats_lock, tca[TCA_RATE-1]);
497 struct check_loop_arg
499 struct qdisc_walker w;
504 static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
506 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
508 struct check_loop_arg arg;
510 if (q->ops->cl_ops == NULL)
513 arg.w.stop = arg.w.skip = arg.w.count = 0;
514 arg.w.fn = check_loop_fn;
517 q->ops->cl_ops->walk(q, &arg.w);
518 return arg.w.stop ? -ELOOP : 0;
522 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
525 struct Qdisc_class_ops *cops = q->ops->cl_ops;
526 struct check_loop_arg *arg = (struct check_loop_arg *)w;
528 leaf = cops->leaf(q, cl);
530 if (leaf == arg->p || arg->depth > 7)
532 return check_loop(leaf, arg->p, arg->depth + 1);
541 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
543 struct tcmsg *tcm = NLMSG_DATA(n);
544 struct rtattr **tca = arg;
545 struct net_device *dev;
546 u32 clid = tcm->tcm_parent;
547 struct Qdisc *q = NULL;
548 struct Qdisc *p = NULL;
551 if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
555 if (clid != TC_H_ROOT) {
556 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
557 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
559 q = qdisc_leaf(p, clid);
560 } else { /* ingress */
561 q = dev->qdisc_ingress;
564 q = dev->qdisc_sleeping;
569 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
572 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
576 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
579 if (n->nlmsg_type == RTM_DELQDISC) {
584 if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0)
587 qdisc_notify(skb, n, clid, q, NULL);
588 spin_lock_bh(&dev->queue_lock);
590 spin_unlock_bh(&dev->queue_lock);
593 qdisc_notify(skb, n, clid, NULL, q);
602 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
606 struct net_device *dev;
612 /* Reinit, just in case something touches this. */
615 clid = tcm->tcm_parent;
618 if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
622 if (clid != TC_H_ROOT) {
623 if (clid != TC_H_INGRESS) {
624 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
626 q = qdisc_leaf(p, clid);
627 } else { /*ingress */
628 q = dev->qdisc_ingress;
631 q = dev->qdisc_sleeping;
634 /* It may be default qdisc, ignore it */
635 if (q && q->handle == 0)
638 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
639 if (tcm->tcm_handle) {
640 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
642 if (TC_H_MIN(tcm->tcm_handle))
644 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
646 if (n->nlmsg_flags&NLM_F_EXCL)
648 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
651 (p && check_loop(q, p, 0)))
653 atomic_inc(&q->refcnt);
659 /* This magic test requires explanation.
661 * We know, that some child q is already
662 * attached to this parent and have choice:
663 * either to change it or to create/graft new one.
665 * 1. We are allowed to create/graft only
666 * if CREATE and REPLACE flags are set.
668 * 2. If EXCL is set, requestor wanted to say,
669 * that qdisc tcm_handle is not expected
670 * to exist, so that we choose create/graft too.
672 * 3. The last case is when no flags are set.
673 * Alas, it is sort of hole in API, we
674 * cannot decide what to do unambiguously.
675 * For now we select create/graft, if
676 * user gave KIND, which does not match existing.
678 if ((n->nlmsg_flags&NLM_F_CREATE) &&
679 (n->nlmsg_flags&NLM_F_REPLACE) &&
680 ((n->nlmsg_flags&NLM_F_EXCL) ||
682 rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))))
687 if (!tcm->tcm_handle)
689 q = qdisc_lookup(dev, tcm->tcm_handle);
692 /* Change qdisc parameters */
695 if (n->nlmsg_flags&NLM_F_EXCL)
697 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
699 err = qdisc_change(q, tca);
701 qdisc_notify(skb, n, clid, NULL, q);
705 if (!(n->nlmsg_flags&NLM_F_CREATE))
707 if (clid == TC_H_INGRESS)
708 q = qdisc_create(dev, tcm->tcm_parent, tca, &err);
710 q = qdisc_create(dev, tcm->tcm_handle, tca, &err);
719 struct Qdisc *old_q = NULL;
720 err = qdisc_graft(dev, p, clid, q, &old_q);
723 spin_lock_bh(&dev->queue_lock);
725 spin_unlock_bh(&dev->queue_lock);
729 qdisc_notify(skb, n, clid, old_q, q);
731 spin_lock_bh(&dev->queue_lock);
732 qdisc_destroy(old_q);
733 spin_unlock_bh(&dev->queue_lock);
739 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
740 u32 pid, u32 seq, u16 flags, int event)
743 struct nlmsghdr *nlh;
744 unsigned char *b = skb->tail;
747 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
748 tcm = NLMSG_DATA(nlh);
749 tcm->tcm_family = AF_UNSPEC;
752 tcm->tcm_ifindex = q->dev->ifindex;
753 tcm->tcm_parent = clid;
754 tcm->tcm_handle = q->handle;
755 tcm->tcm_info = atomic_read(&q->refcnt);
756 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
757 if (q->ops->dump && q->ops->dump(q, skb) < 0)
759 q->qstats.qlen = q->q.qlen;
761 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
762 TCA_XSTATS, q->stats_lock, &d) < 0)
765 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
768 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
769 #ifdef CONFIG_NET_ESTIMATOR
770 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
772 gnet_stats_copy_queue(&d, &q->qstats) < 0)
775 if (gnet_stats_finish_copy(&d) < 0)
778 nlh->nlmsg_len = skb->tail - b;
783 skb_trim(skb, b - skb->data);
787 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
788 u32 clid, struct Qdisc *old, struct Qdisc *new)
791 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
793 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
797 if (old && old->handle) {
798 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
802 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
807 return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
814 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
818 struct net_device *dev;
822 s_q_idx = q_idx = cb->args[1];
823 read_lock(&dev_base_lock);
824 for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
829 read_lock_bh(&qdisc_tree_lock);
831 list_for_each_entry(q, &dev->qdisc_list, list) {
832 if (q_idx < s_q_idx) {
836 if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
837 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) {
838 read_unlock_bh(&qdisc_tree_lock);
843 read_unlock_bh(&qdisc_tree_lock);
847 read_unlock(&dev_base_lock);
857 /************************************************
858 * Traffic classes manipulation. *
859 ************************************************/
863 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
865 struct tcmsg *tcm = NLMSG_DATA(n);
866 struct rtattr **tca = arg;
867 struct net_device *dev;
868 struct Qdisc *q = NULL;
869 struct Qdisc_class_ops *cops;
870 unsigned long cl = 0;
871 unsigned long new_cl;
872 u32 pid = tcm->tcm_parent;
873 u32 clid = tcm->tcm_handle;
874 u32 qid = TC_H_MAJ(clid);
877 if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
881 parent == TC_H_UNSPEC - unspecified parent.
882 parent == TC_H_ROOT - class is root, which has no parent.
883 parent == X:0 - parent is root class.
884 parent == X:Y - parent is a node in hierarchy.
885 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
887 handle == 0:0 - generate handle from kernel pool.
888 handle == 0:Y - class is X:Y, where X:0 is qdisc.
889 handle == X:Y - clear.
890 handle == X:0 - root class.
893 /* Step 1. Determine qdisc handle X:0 */
895 if (pid != TC_H_ROOT) {
896 u32 qid1 = TC_H_MAJ(pid);
899 /* If both majors are known, they must be identical. */
905 qid = dev->qdisc_sleeping->handle;
907 /* Now qid is genuine qdisc handle consistent
908 both with parent and child.
910 TC_H_MAJ(pid) still may be unspecified, complete it now.
913 pid = TC_H_MAKE(qid, pid);
916 qid = dev->qdisc_sleeping->handle;
919 /* OK. Locate qdisc */
920 if ((q = qdisc_lookup(dev, qid)) == NULL)
923 /* An check that it supports classes */
924 cops = q->ops->cl_ops;
928 /* Now try to get class */
930 if (pid == TC_H_ROOT)
933 clid = TC_H_MAKE(qid, clid);
936 cl = cops->get(q, clid);
940 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
943 switch (n->nlmsg_type) {
946 if (n->nlmsg_flags&NLM_F_EXCL)
950 err = cops->delete(q, cl);
952 tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
955 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
964 err = cops->change(q, clid, pid, tca, &new_cl);
966 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
976 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
978 u32 pid, u32 seq, u16 flags, int event)
981 struct nlmsghdr *nlh;
982 unsigned char *b = skb->tail;
984 struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
986 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
987 tcm = NLMSG_DATA(nlh);
988 tcm->tcm_family = AF_UNSPEC;
989 tcm->tcm_ifindex = q->dev->ifindex;
990 tcm->tcm_parent = q->handle;
991 tcm->tcm_handle = q->handle;
993 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
994 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
997 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
998 TCA_XSTATS, q->stats_lock, &d) < 0)
1001 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1002 goto rtattr_failure;
1004 if (gnet_stats_finish_copy(&d) < 0)
1005 goto rtattr_failure;
1007 nlh->nlmsg_len = skb->tail - b;
1012 skb_trim(skb, b - skb->data);
1016 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1017 struct Qdisc *q, unsigned long cl, int event)
1019 struct sk_buff *skb;
1020 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1022 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1026 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
1031 return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1034 struct qdisc_dump_args
1036 struct qdisc_walker w;
1037 struct sk_buff *skb;
1038 struct netlink_callback *cb;
1041 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1043 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1045 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1046 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1049 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1053 struct net_device *dev;
1055 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
1056 struct qdisc_dump_args arg;
1058 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1060 if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
1066 read_lock_bh(&qdisc_tree_lock);
1067 list_for_each_entry(q, &dev->qdisc_list, list) {
1068 if (t < s_t || !q->ops->cl_ops ||
1070 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1075 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1076 arg.w.fn = qdisc_class_dump;
1080 arg.w.skip = cb->args[1];
1082 q->ops->cl_ops->walk(q, &arg.w);
1083 cb->args[1] = arg.w.count;
1088 read_unlock_bh(&qdisc_tree_lock);
1096 /* Main classifier routine: scans classifier chain attached
1097 to this qdisc, (optionally) tests for protocol and asks
1098 specific classifiers.
1100 int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
1101 struct tcf_result *res)
1104 u32 protocol = skb->protocol;
1105 #ifdef CONFIG_NET_CLS_ACT
1106 struct tcf_proto *otp = tp;
1109 protocol = skb->protocol;
1111 for ( ; tp; tp = tp->next) {
1112 if ((tp->protocol == protocol ||
1113 tp->protocol == __constant_htons(ETH_P_ALL)) &&
1114 (err = tp->classify(skb, tp, res)) >= 0) {
1115 #ifdef CONFIG_NET_CLS_ACT
1116 if ( TC_ACT_RECLASSIFY == err) {
1117 __u32 verd = (__u32) G_TC_VERD(skb->tc_verd);
1120 if (MAX_REC_LOOP < verd++) {
1121 printk("rule prio %d protocol %02x reclassify is buggy packet dropped\n",
1122 tp->prio&0xffff, ntohs(tp->protocol));
1125 skb->tc_verd = SET_TC_VERD(skb->tc_verd,verd);
1129 skb->tc_verd = SET_TC_VERD(skb->tc_verd,0);
1142 static int psched_us_per_tick = 1;
1143 static int psched_tick_per_us = 1;
1145 #ifdef CONFIG_PROC_FS
1146 static int psched_show(struct seq_file *seq, void *v)
1148 seq_printf(seq, "%08x %08x %08x %08x\n",
1149 psched_tick_per_us, psched_us_per_tick,
1155 static int psched_open(struct inode *inode, struct file *file)
1157 return single_open(file, psched_show, PDE(inode)->data);
1160 static struct file_operations psched_fops = {
1161 .owner = THIS_MODULE,
1162 .open = psched_open,
1164 .llseek = seq_lseek,
1165 .release = single_release,
1169 #ifdef CONFIG_NET_SCH_CLK_CPU
1170 psched_tdiff_t psched_clock_per_hz;
1171 int psched_clock_scale;
1172 EXPORT_SYMBOL(psched_clock_per_hz);
1173 EXPORT_SYMBOL(psched_clock_scale);
1175 psched_time_t psched_time_base;
1176 cycles_t psched_time_mark;
1177 EXPORT_SYMBOL(psched_time_mark);
1178 EXPORT_SYMBOL(psched_time_base);
1181 * Periodically adjust psched_time_base to avoid overflow
1182 * with 32-bit get_cycles(). Safe up to 4GHz CPU.
1184 static void psched_tick(unsigned long);
1185 static struct timer_list psched_timer = TIMER_INITIALIZER(psched_tick, 0, 0);
1187 static void psched_tick(unsigned long dummy)
1189 if (sizeof(cycles_t) == sizeof(u32)) {
1190 psched_time_t dummy_stamp;
1191 PSCHED_GET_TIME(dummy_stamp);
1192 psched_timer.expires = jiffies + 1*HZ;
1193 add_timer(&psched_timer);
1197 int __init psched_calibrate_clock(void)
1199 psched_time_t stamp, stamp1;
1200 struct timeval tv, tv1;
1201 psched_tdiff_t delay;
1206 stop = jiffies + HZ/10;
1207 PSCHED_GET_TIME(stamp);
1208 do_gettimeofday(&tv);
1209 while (time_before(jiffies, stop)) {
1213 PSCHED_GET_TIME(stamp1);
1214 do_gettimeofday(&tv1);
1216 delay = PSCHED_TDIFF(stamp1, stamp);
1217 rdelay = tv1.tv_usec - tv.tv_usec;
1218 rdelay += (tv1.tv_sec - tv.tv_sec)*1000000;
1222 psched_tick_per_us = delay;
1223 while ((delay>>=1) != 0)
1224 psched_clock_scale++;
1225 psched_us_per_tick = 1<<psched_clock_scale;
1226 psched_clock_per_hz = (psched_tick_per_us*(1000000/HZ))>>psched_clock_scale;
1231 static int __init pktsched_init(void)
1233 struct rtnetlink_link *link_p;
1235 #ifdef CONFIG_NET_SCH_CLK_CPU
1236 if (psched_calibrate_clock() < 0)
1238 #elif defined(CONFIG_NET_SCH_CLK_JIFFIES)
1239 psched_tick_per_us = HZ<<PSCHED_JSCALE;
1240 psched_us_per_tick = 1000000;
1243 link_p = rtnetlink_links[PF_UNSPEC];
1245 /* Setup rtnetlink links. It is made here to avoid
1246 exporting large number of public symbols.
1250 link_p[RTM_NEWQDISC-RTM_BASE].doit = tc_modify_qdisc;
1251 link_p[RTM_DELQDISC-RTM_BASE].doit = tc_get_qdisc;
1252 link_p[RTM_GETQDISC-RTM_BASE].doit = tc_get_qdisc;
1253 link_p[RTM_GETQDISC-RTM_BASE].dumpit = tc_dump_qdisc;
1254 link_p[RTM_NEWTCLASS-RTM_BASE].doit = tc_ctl_tclass;
1255 link_p[RTM_DELTCLASS-RTM_BASE].doit = tc_ctl_tclass;
1256 link_p[RTM_GETTCLASS-RTM_BASE].doit = tc_ctl_tclass;
1257 link_p[RTM_GETTCLASS-RTM_BASE].dumpit = tc_dump_tclass;
1260 register_qdisc(&pfifo_qdisc_ops);
1261 register_qdisc(&bfifo_qdisc_ops);
1262 proc_net_fops_create("psched", 0, &psched_fops);
1267 subsys_initcall(pktsched_init);
1269 EXPORT_SYMBOL(qdisc_lookup);
1270 EXPORT_SYMBOL(qdisc_get_rtab);
1271 EXPORT_SYMBOL(qdisc_put_rtab);
1272 EXPORT_SYMBOL(register_qdisc);
1273 EXPORT_SYMBOL(unregister_qdisc);
1274 EXPORT_SYMBOL(tc_classify);