From 3072367300aa8c779e3a14ee8e89de079e90f3ad Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 18 Jul 2008 22:50:15 -0700 Subject: [PATCH] pkt_sched: Manage qdisc list inside of root qdisc. Idea is from Patrick McHardy. Instead of managing the list of qdiscs on the device level, manage it in the root qdisc of a netdev_queue. This solves all kinds of visibility issues during qdisc destruction. The way to iterate over all qdiscs of a netdev_queue is to visit the netdev_queue->qdisc, and then traverse it's list. The only special case is to ignore builting qdiscs at the root when dumping or doing a qdisc_lookup(). That was not needed previously because builtin qdiscs were not added to the device's qdisc_list. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 - net/core/dev.c | 2 - net/sched/sch_api.c | 175 ++++++++++++++++++++++++++++---------- net/sched/sch_generic.c | 10 +-- 4 files changed, 133 insertions(+), 56 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9c5a688501..812bcd8b43 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -636,8 +636,6 @@ struct net_device unsigned int real_num_tx_queues; unsigned long tx_queue_len; /* Max frames per queue allowed */ - spinlock_t qdisc_list_lock; - struct list_head qdisc_list; /* * One part is mostly used on xmit path (device) diff --git a/net/core/dev.c b/net/core/dev.c index e54acde839..065b9817e2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3888,8 +3888,6 @@ int register_netdevice(struct net_device *dev) net = dev_net(dev); spin_lock_init(&dev->addr_list_lock); - spin_lock_init(&dev->qdisc_list_lock); - INIT_LIST_HEAD(&dev->qdisc_list); netdev_init_queue_locks(dev); dev->iflink = -1; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index b3ef830720..fb43731c98 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -185,11 +185,20 @@ EXPORT_SYMBOL(unregister_qdisc); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) { - struct Qdisc *q; + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + struct Qdisc *q, *txq_root = txq->qdisc; - list_for_each_entry(q, &dev->qdisc_list, list) { - if (q->handle == handle) - return q; + if (!(txq_root->flags & TCQ_F_BUILTIN) && + txq_root->handle == handle) + return txq_root; + + list_for_each_entry(q, &txq_root->list, list) { + if (q->handle == handle) + return q; + } } return NULL; } @@ -676,9 +685,8 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, goto err_out3; } } - spin_lock_bh(&dev->qdisc_list_lock); - list_add_tail(&sch->list, &dev->qdisc_list); - spin_unlock_bh(&dev->qdisc_list_lock); + if (parent) + list_add_tail(&sch->list, &dev_queue->qdisc->list); return sch; } @@ -1037,13 +1045,57 @@ err_out: return -EINVAL; } +static bool tc_qdisc_dump_ignore(struct Qdisc *q) +{ + return (q->flags & TCQ_F_BUILTIN) ? true : false; +} + +static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, + struct netlink_callback *cb, + int *q_idx_p, int s_q_idx) +{ + int ret = 0, q_idx = *q_idx_p; + struct Qdisc *q; + + if (!root) + return 0; + + q = root; + if (q_idx < s_q_idx) { + q_idx++; + } else { + if (!tc_qdisc_dump_ignore(q) && + tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) + goto done; + q_idx++; + } + list_for_each_entry(q, &root->list, list) { + if (q_idx < s_q_idx) { + q_idx++; + continue; + } + if (!tc_qdisc_dump_ignore(q) && + tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) + goto done; + q_idx++; + } + +out: + *q_idx_p = q_idx; + return ret; +done: + ret = -1; + goto out; +} + static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); int idx, q_idx; int s_idx, s_q_idx; struct net_device *dev; - struct Qdisc *q; if (net != &init_net) return 0; @@ -1053,21 +1105,22 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) read_lock(&dev_base_lock); idx = 0; for_each_netdev(&init_net, dev) { + struct netdev_queue *dev_queue; + if (idx < s_idx) goto cont; if (idx > s_idx) s_q_idx = 0; q_idx = 0; - list_for_each_entry(q, &dev->qdisc_list, list) { - if (q_idx < s_q_idx) { - q_idx++; - continue; - } - if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) - goto done; - q_idx++; - } + + dev_queue = netdev_get_tx_queue(dev, 0); + if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0) + goto done; + + dev_queue = &dev->rx_queue; + if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0) + goto done; + cont: idx++; } @@ -1285,15 +1338,62 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS); } +static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb, + struct tcmsg *tcm, struct netlink_callback *cb, + int *t_p, int s_t) +{ + struct qdisc_dump_args arg; + + if (tc_qdisc_dump_ignore(q) || + *t_p < s_t || !q->ops->cl_ops || + (tcm->tcm_parent && + TC_H_MAJ(tcm->tcm_parent) != q->handle)) { + (*t_p)++; + return 0; + } + if (*t_p > s_t) + memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); + arg.w.fn = qdisc_class_dump; + arg.skb = skb; + arg.cb = cb; + arg.w.stop = 0; + arg.w.skip = cb->args[1]; + arg.w.count = 0; + q->ops->cl_ops->walk(q, &arg.w); + cb->args[1] = arg.w.count; + if (arg.w.stop) + return -1; + (*t_p)++; + return 0; +} + +static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, + struct tcmsg *tcm, struct netlink_callback *cb, + int *t_p, int s_t) +{ + struct Qdisc *q; + + if (!root) + return 0; + + if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0) + return -1; + + list_for_each_entry(q, &root->list, list) { + if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) + return -1; + } + + return 0; +} + static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) { + struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); struct net *net = sock_net(skb->sk); - int t; - int s_t; + struct netdev_queue *dev_queue; struct net_device *dev; - struct Qdisc *q; - struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); - struct qdisc_dump_args arg; + int t, s_t; if (net != &init_net) return 0; @@ -1306,28 +1406,15 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; t = 0; - list_for_each_entry(q, &dev->qdisc_list, list) { - if (t < s_t || !q->ops->cl_ops || - (tcm->tcm_parent && - TC_H_MAJ(tcm->tcm_parent) != q->handle)) { - t++; - continue; - } - if (t > s_t) - memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); - arg.w.fn = qdisc_class_dump; - arg.skb = skb; - arg.cb = cb; - arg.w.stop = 0; - arg.w.skip = cb->args[1]; - arg.w.count = 0; - q->ops->cl_ops->walk(q, &arg.w); - cb->args[1] = arg.w.count; - if (arg.w.stop) - break; - t++; - } + dev_queue = netdev_get_tx_queue(dev, 0); + if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0) + goto done; + + dev_queue = &dev->rx_queue; + if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0) + goto done; +done: cb->args[0] = t; dev_put(dev); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index e244c462e6..14cc443d04 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -480,15 +480,12 @@ static void __qdisc_destroy(struct rcu_head *head) void qdisc_destroy(struct Qdisc *qdisc) { - struct net_device *dev = qdisc_dev(qdisc); - if (qdisc->flags & TCQ_F_BUILTIN || !atomic_dec_and_test(&qdisc->refcnt)) return; - spin_lock_bh(&dev->qdisc_list_lock); - list_del(&qdisc->list); - spin_unlock_bh(&dev->qdisc_list_lock); + if (qdisc->parent) + list_del(&qdisc->list); call_rcu(&qdisc->q_rcu, __qdisc_destroy); } @@ -520,9 +517,6 @@ static void attach_one_default_qdisc(struct net_device *dev, printk(KERN_INFO "%s: activation failed\n", dev->name); return; } - spin_lock_bh(&dev->qdisc_list_lock); - list_add_tail(&qdisc->list, &dev->qdisc_list); - spin_unlock_bh(&dev->qdisc_list_lock); } else { qdisc = &noqueue_qdisc; } -- 2.39.5