X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=net%2Fsched%2Fsch_api.c;h=4840aff4725603295d4eef1ebca7f24cd5f9a853;hb=11d46123bfea068a48483f00518d301f452647fb;hp=95873f8dd37cc206155146810945c30e96ecfacf;hpb=2aec609fb45e84d65bc8eabc7b650bbecb1cc179;p=linux-2.6 diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 95873f8dd3..4840aff472 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -185,12 +185,20 @@ EXPORT_SYMBOL(unregister_qdisc); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) { - struct netdev_queue *dev_queue = &dev->tx_queue; - struct Qdisc *q; + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + struct Qdisc *q, *txq_root = txq->qdisc; + + if (!(txq_root->flags & TCQ_F_BUILTIN) && + txq_root->handle == handle) + return txq_root; - list_for_each_entry(q, &dev_queue->qdisc_list, list) { - if (q->handle == handle) - return q; + list_for_each_entry(q, &txq_root->list, list) { + if (q->handle == handle) + return q; + } } return NULL; } @@ -278,15 +286,137 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab) } EXPORT_SYMBOL(qdisc_put_rtab); +static LIST_HEAD(qdisc_stab_list); +static DEFINE_SPINLOCK(qdisc_stab_lock); + +static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = { + [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) }, + [TCA_STAB_DATA] = { .type = NLA_BINARY }, +}; + +static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) +{ + struct nlattr *tb[TCA_STAB_MAX + 1]; + struct qdisc_size_table *stab; + struct tc_sizespec *s; + unsigned int tsize = 0; + u16 *tab = NULL; + int err; + + err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy); + if (err < 0) + return ERR_PTR(err); + if (!tb[TCA_STAB_BASE]) + return ERR_PTR(-EINVAL); + + s = nla_data(tb[TCA_STAB_BASE]); + + if (s->tsize > 0) { + if (!tb[TCA_STAB_DATA]) + return ERR_PTR(-EINVAL); + tab = nla_data(tb[TCA_STAB_DATA]); + tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16); + } + + if (!s || tsize != s->tsize || (!tab && tsize > 0)) + return ERR_PTR(-EINVAL); + + spin_lock(&qdisc_stab_lock); + + list_for_each_entry(stab, &qdisc_stab_list, list) { + if (memcmp(&stab->szopts, s, sizeof(*s))) + continue; + if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16))) + continue; + stab->refcnt++; + spin_unlock(&qdisc_stab_lock); + return stab; + } + + spin_unlock(&qdisc_stab_lock); + + stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL); + if (!stab) + return ERR_PTR(-ENOMEM); + + stab->refcnt = 1; + stab->szopts = *s; + if (tsize > 0) + memcpy(stab->data, tab, tsize * sizeof(u16)); + + spin_lock(&qdisc_stab_lock); + list_add_tail(&stab->list, &qdisc_stab_list); + spin_unlock(&qdisc_stab_lock); + + return stab; +} + +void qdisc_put_stab(struct qdisc_size_table *tab) +{ + if (!tab) + return; + + spin_lock(&qdisc_stab_lock); + + if (--tab->refcnt == 0) { + list_del(&tab->list); + kfree(tab); + } + + spin_unlock(&qdisc_stab_lock); +} +EXPORT_SYMBOL(qdisc_put_stab); + +static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab) +{ + struct nlattr *nest; + + nest = nla_nest_start(skb, TCA_STAB); + NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts); + nla_nest_end(skb, nest); + + return skb->len; + +nla_put_failure: + return -1; +} + +void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab) +{ + int pkt_len, slot; + + pkt_len = skb->len + stab->szopts.overhead; + if (unlikely(!stab->szopts.tsize)) + goto out; + + slot = pkt_len + stab->szopts.cell_align; + if (unlikely(slot < 0)) + slot = 0; + + slot >>= stab->szopts.cell_log; + if (likely(slot < stab->szopts.tsize)) + pkt_len = stab->data[slot]; + else + pkt_len = stab->data[stab->szopts.tsize - 1] * + (slot / stab->szopts.tsize) + + stab->data[slot % stab->szopts.tsize]; + + pkt_len <<= stab->szopts.size_log; +out: + if (unlikely(pkt_len < 1)) + pkt_len = 1; + qdisc_skb_cb(skb)->pkt_len = pkt_len; +} +EXPORT_SYMBOL(qdisc_calculate_pkt_len); + static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) { struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, timer); - struct netdev_queue *txq = wd->qdisc->dev_queue; wd->qdisc->flags &= ~TCQ_F_THROTTLED; smp_wmb(); - netif_schedule_queue(txq); + __netif_schedule(wd->qdisc); return HRTIMER_NORESTART; } @@ -317,7 +447,7 @@ void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) } EXPORT_SYMBOL(qdisc_watchdog_cancel); -struct hlist_head *qdisc_class_hash_alloc(unsigned int n) +static struct hlist_head *qdisc_class_hash_alloc(unsigned int n) { unsigned int size = n * sizeof(struct hlist_head), i; struct hlist_head *h; @@ -437,49 +567,28 @@ static u32 qdisc_alloc_handle(struct net_device *dev) return i>0 ? autohandle : 0; } -/* Attach toplevel qdisc to device dev */ +/* Attach toplevel qdisc to device queue. */ -static struct Qdisc * -dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc) +static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, + struct Qdisc *qdisc) { - struct netdev_queue *dev_queue; - struct Qdisc *oqdisc; + struct Qdisc *oqdisc = dev_queue->qdisc_sleeping; + spinlock_t *root_lock; - if (dev->flags & IFF_UP) - dev_deactivate(dev); + root_lock = qdisc_root_lock(oqdisc); + spin_lock_bh(root_lock); - qdisc_lock_tree(dev); - if (qdisc && qdisc->flags&TCQ_F_INGRESS) { - dev_queue = &dev->rx_queue; - oqdisc = dev_queue->qdisc; - /* Prune old scheduler */ - if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) { - /* delete */ - qdisc_reset(oqdisc); - dev_queue->qdisc = NULL; - } else { /* new */ - dev_queue->qdisc = qdisc; - } + /* Prune old scheduler */ + if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) + qdisc_reset(oqdisc); - } else { - dev_queue = &dev->tx_queue; - oqdisc = dev_queue->qdisc_sleeping; - - /* Prune old scheduler */ - if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) - qdisc_reset(oqdisc); - - /* ... and graft new one */ - if (qdisc == NULL) - qdisc = &noop_qdisc; - dev_queue->qdisc_sleeping = qdisc; - dev_queue->qdisc = &noop_qdisc; - } - - qdisc_unlock_tree(dev); + /* ... and graft new one */ + if (qdisc == NULL) + qdisc = &noop_qdisc; + dev_queue->qdisc_sleeping = qdisc; + dev_queue->qdisc = &noop_qdisc; - if (dev->flags & IFF_UP) - dev_activate(dev); + spin_unlock_bh(root_lock); return oqdisc; } @@ -512,26 +621,64 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) } EXPORT_SYMBOL(qdisc_tree_decrease_qlen); -/* Graft qdisc "new" to class "classid" of qdisc "parent" or - to device "dev". +static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid, + struct Qdisc *old, struct Qdisc *new) +{ + if (new || old) + qdisc_notify(skb, n, clid, old, new); - Old qdisc is not destroyed but returned in *old. + if (old) { + spin_lock_bh(&old->q.lock); + qdisc_destroy(old); + spin_unlock_bh(&old->q.lock); + } +} + +/* Graft qdisc "new" to class "classid" of qdisc "parent" or + * to device "dev". + * + * When appropriate send a netlink notification using 'skb' + * and "n". + * + * On success, destroy old qdisc. */ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, - u32 classid, - struct Qdisc *new, struct Qdisc **old) + struct sk_buff *skb, struct nlmsghdr *n, u32 classid, + struct Qdisc *new, struct Qdisc *old) { + struct Qdisc *q = old; int err = 0; - struct Qdisc *q = *old; - if (parent == NULL) { - if (q && q->flags&TCQ_F_INGRESS) { - *old = dev_graft_qdisc(dev, q); - } else { - *old = dev_graft_qdisc(dev, new); + unsigned int i, num_q, ingress; + + ingress = 0; + num_q = dev->num_tx_queues; + if ((q && q->flags & TCQ_F_INGRESS) || + (new && new->flags & TCQ_F_INGRESS)) { + num_q = 1; + ingress = 1; + } + + if (dev->flags & IFF_UP) + dev_deactivate(dev); + + for (i = 0; i < num_q; i++) { + struct netdev_queue *dev_queue = &dev->rx_queue; + + if (!ingress) + dev_queue = netdev_get_tx_queue(dev, i); + + old = dev_graft_qdisc(dev_queue, new); + if (new && i > 0) + atomic_inc(&new->refcnt); + + notify_and_destroy(skb, n, classid, old, new); } + + if (dev->flags & IFF_UP) + dev_activate(dev); } else { const struct Qdisc_class_ops *cops = parent->ops->cl_ops; @@ -540,10 +687,12 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, if (cops) { unsigned long cl = cops->get(parent, classid); if (cl) { - err = cops->graft(parent, cl, new, old); + err = cops->graft(parent, cl, new, &old); cops->put(parent, cl); } } + if (!err) + notify_and_destroy(skb, n, classid, old, new); } return err; } @@ -562,6 +711,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, struct nlattr *kind = tca[TCA_KIND]; struct Qdisc *sch; struct Qdisc_ops *ops; + struct qdisc_size_table *stab; ops = qdisc_lookup_ops(kind); #ifdef CONFIG_KMOD @@ -619,9 +769,17 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, sch->handle = handle; if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) { + if (tca[TCA_STAB]) { + stab = qdisc_get_stab(tca[TCA_STAB]); + if (IS_ERR(stab)) { + err = PTR_ERR(stab); + goto err_out3; + } + sch->stab = stab; + } if (tca[TCA_RATE]) { err = gen_new_estimator(&sch->bstats, &sch->rate_est, - &sch->dev_queue->lock, + qdisc_root_lock(sch), tca[TCA_RATE]); if (err) { /* @@ -634,13 +792,13 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, goto err_out3; } } - qdisc_lock_tree(dev); - list_add_tail(&sch->list, &dev_queue->qdisc_list); - qdisc_unlock_tree(dev); + if (parent && !(sch->flags & TCQ_F_INGRESS)) + list_add_tail(&sch->list, &dev_queue->qdisc->list); return sch; } err_out3: + qdisc_put_stab(sch->stab); dev_put(dev); kfree((char *) sch - sch->padded); err_out2: @@ -652,18 +810,29 @@ err_out: static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) { - if (tca[TCA_OPTIONS]) { - int err; + struct qdisc_size_table *stab = NULL; + int err = 0; + if (tca[TCA_OPTIONS]) { if (sch->ops->change == NULL) return -EINVAL; err = sch->ops->change(sch, tca[TCA_OPTIONS]); if (err) return err; } + + if (tca[TCA_STAB]) { + stab = qdisc_get_stab(tca[TCA_STAB]); + if (IS_ERR(stab)) + return PTR_ERR(stab); + } + + qdisc_put_stab(sch->stab); + sch->stab = stab; + if (tca[TCA_RATE]) gen_replace_estimator(&sch->bstats, &sch->rate_est, - &sch->dev_queue->lock, tca[TCA_RATE]); + qdisc_root_lock(sch), tca[TCA_RATE]); return 0; } @@ -742,7 +911,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) q = dev->rx_queue.qdisc; } } else { - struct netdev_queue *dev_queue = &dev->tx_queue; + struct netdev_queue *dev_queue; + dev_queue = netdev_get_tx_queue(dev, 0); q = dev_queue->qdisc_sleeping; } if (!q) @@ -763,14 +933,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) return -EINVAL; if (q->handle == 0) return -ENOENT; - if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0) + if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0) return err; - if (q) { - qdisc_notify(skb, n, clid, q, NULL); - qdisc_lock_tree(dev); - qdisc_destroy(q); - qdisc_unlock_tree(dev); - } } else { qdisc_notify(skb, n, clid, NULL, q); } @@ -817,7 +981,8 @@ replay: q = dev->rx_queue.qdisc; } } else { - struct netdev_queue *dev_queue = &dev->tx_queue; + struct netdev_queue *dev_queue; + dev_queue = netdev_get_tx_queue(dev, 0); q = dev_queue->qdisc_sleeping; } @@ -899,7 +1064,7 @@ create_n_graft: tcm->tcm_parent, tcm->tcm_parent, tca, &err); else - q = qdisc_create(dev, &dev->tx_queue, + q = qdisc_create(dev, netdev_get_tx_queue(dev, 0), tcm->tcm_parent, tcm->tcm_handle, tca, &err); if (q == NULL) { @@ -910,22 +1075,18 @@ create_n_graft: graft: if (1) { - struct Qdisc *old_q = NULL; - err = qdisc_graft(dev, p, clid, q, &old_q); + spinlock_t *root_lock; + + err = qdisc_graft(dev, p, skb, n, clid, q, NULL); if (err) { if (q) { - qdisc_lock_tree(dev); + root_lock = qdisc_root_lock(q); + spin_lock_bh(root_lock); qdisc_destroy(q); - qdisc_unlock_tree(dev); + spin_unlock_bh(root_lock); } return err; } - qdisc_notify(skb, n, clid, old_q, q); - if (old_q) { - qdisc_lock_tree(dev); - qdisc_destroy(old_q); - qdisc_unlock_tree(dev); - } } return 0; } @@ -952,8 +1113,11 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, goto nla_put_failure; q->qstats.qlen = q->q.qlen; + if (q->stab && qdisc_dump_stab(skb, q->stab) < 0) + goto nla_put_failure; + if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, - TCA_XSTATS, &q->dev_queue->lock, &d) < 0) + TCA_XSTATS, qdisc_root_lock(q), &d) < 0) goto nla_put_failure; if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) @@ -1003,13 +1167,57 @@ err_out: return -EINVAL; } +static bool tc_qdisc_dump_ignore(struct Qdisc *q) +{ + return (q->flags & TCQ_F_BUILTIN) ? true : false; +} + +static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, + struct netlink_callback *cb, + int *q_idx_p, int s_q_idx) +{ + int ret = 0, q_idx = *q_idx_p; + struct Qdisc *q; + + if (!root) + return 0; + + q = root; + if (q_idx < s_q_idx) { + q_idx++; + } else { + if (!tc_qdisc_dump_ignore(q) && + tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) + goto done; + q_idx++; + } + list_for_each_entry(q, &root->list, list) { + if (q_idx < s_q_idx) { + q_idx++; + continue; + } + if (!tc_qdisc_dump_ignore(q) && + tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) + goto done; + q_idx++; + } + +out: + *q_idx_p = q_idx; + return ret; +done: + ret = -1; + goto out; +} + static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); int idx, q_idx; int s_idx, s_q_idx; struct net_device *dev; - struct Qdisc *q; if (net != &init_net) return 0; @@ -1020,22 +1228,21 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) idx = 0; for_each_netdev(&init_net, dev) { struct netdev_queue *dev_queue; + if (idx < s_idx) goto cont; if (idx > s_idx) s_q_idx = 0; q_idx = 0; - dev_queue = &dev->tx_queue; - list_for_each_entry(q, &dev_queue->qdisc_list, list) { - if (q_idx < s_q_idx) { - q_idx++; - continue; - } - if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) - goto done; - q_idx++; - } + + dev_queue = netdev_get_tx_queue(dev, 0); + if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0) + goto done; + + dev_queue = &dev->rx_queue; + if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0) + goto done; + cont: idx++; } @@ -1098,7 +1305,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) /* Step 1. Determine qdisc handle X:0 */ - dev_queue = &dev->tx_queue; + dev_queue = netdev_get_tx_queue(dev, 0); if (pid != TC_H_ROOT) { u32 qid1 = TC_H_MAJ(pid); @@ -1202,7 +1409,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, goto nla_put_failure; if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, - TCA_XSTATS, &q->dev_queue->lock, &d) < 0) + TCA_XSTATS, qdisc_root_lock(q), &d) < 0) goto nla_put_failure; if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0) @@ -1253,16 +1460,62 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS); } +static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb, + struct tcmsg *tcm, struct netlink_callback *cb, + int *t_p, int s_t) +{ + struct qdisc_dump_args arg; + + if (tc_qdisc_dump_ignore(q) || + *t_p < s_t || !q->ops->cl_ops || + (tcm->tcm_parent && + TC_H_MAJ(tcm->tcm_parent) != q->handle)) { + (*t_p)++; + return 0; + } + if (*t_p > s_t) + memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); + arg.w.fn = qdisc_class_dump; + arg.skb = skb; + arg.cb = cb; + arg.w.stop = 0; + arg.w.skip = cb->args[1]; + arg.w.count = 0; + q->ops->cl_ops->walk(q, &arg.w); + cb->args[1] = arg.w.count; + if (arg.w.stop) + return -1; + (*t_p)++; + return 0; +} + +static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, + struct tcmsg *tcm, struct netlink_callback *cb, + int *t_p, int s_t) +{ + struct Qdisc *q; + + if (!root) + return 0; + + if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0) + return -1; + + list_for_each_entry(q, &root->list, list) { + if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) + return -1; + } + + return 0; +} + static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) { + struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); struct net *net = sock_net(skb->sk); struct netdev_queue *dev_queue; - int t; - int s_t; struct net_device *dev; - struct Qdisc *q; - struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); - struct qdisc_dump_args arg; + int t, s_t; if (net != &init_net) return 0; @@ -1275,29 +1528,15 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; t = 0; - dev_queue = &dev->tx_queue; - list_for_each_entry(q, &dev_queue->qdisc_list, list) { - if (t < s_t || !q->ops->cl_ops || - (tcm->tcm_parent && - TC_H_MAJ(tcm->tcm_parent) != q->handle)) { - t++; - continue; - } - if (t > s_t) - memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); - arg.w.fn = qdisc_class_dump; - arg.skb = skb; - arg.cb = cb; - arg.w.stop = 0; - arg.w.skip = cb->args[1]; - arg.w.count = 0; - q->ops->cl_ops->walk(q, &arg.w); - cb->args[1] = arg.w.count; - if (arg.w.stop) - break; - t++; - } + dev_queue = netdev_get_tx_queue(dev, 0); + if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0) + goto done; + dev_queue = &dev->rx_queue; + if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0) + goto done; + +done: cb->args[0] = t; dev_put(dev);