2 * net/sched/sch_netem.c Network emulator
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Many of the algorithms and ideas for this came from
10 * NIST Net which is not copyrighted.
12 * Authors: Stephen Hemminger <shemminger@osdl.org>
13 * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
16 #include <linux/config.h>
17 #include <linux/module.h>
18 #include <linux/bitops.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/errno.h>
22 #include <linux/netdevice.h>
23 #include <linux/skbuff.h>
24 #include <linux/rtnetlink.h>
26 #include <net/pkt_sched.h>
28 /* Network Emulation Queuing algorithm.
29 ====================================
31 Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
32 Network Emulation Tool
33 [2] Luigi Rizzo, DummyNet for FreeBSD
35 ----------------------------------------------------------------
37 This started out as a simple way to delay outgoing packets to
38 test TCP but has grown to include most of the functionality
39 of a full blown network emulator like NISTnet. It can delay
40 packets and add random jitter (and correlation). The random
41 distribution can be loaded from a table as well to provide
42 normal, Pareto, or experimental curves. Packet loss,
43 duplication, and reordering can also be emulated.
45 This qdisc does not do classification that can be handled in
46 layering other disciplines. It does not need to do bandwidth
47 control either since that can be handled by using token
48 bucket or other rate control.
50 The simulator is limited by the Linux timer resolution
51 and will create packet bursts on the HZ boundary (1ms).
54 struct netem_sched_data {
56 struct sk_buff_head delayed;
57 struct timer_list timer;
70 } delay_cor, loss_cor, dup_cor;
78 /* Time stamp put into socket buffer control block */
80 psched_time_t time_to_send;
83 /* init_crandom - initialize correlated random number generator
84 * Use entropy source for initial seed.
86 static void init_crandom(struct crndstate *state, unsigned long rho)
89 state->last = net_random();
92 /* get_crandom - correlated random number generator
93 * Next number depends on last value.
94 * rho is scaled to avoid floating point.
96 static unsigned long get_crandom(struct crndstate *state)
101 if (state->rho == 0) /* no correllation */
104 value = net_random();
105 rho = (u64)state->rho + 1;
106 answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
107 state->last = answer;
111 /* tabledist - return a pseudo-randomly distributed value with mean mu and
112 * std deviation sigma. Uses table lookup to approximate the desired
113 * distribution, and a uniformly-distributed pseudo-random source.
115 static long tabledist(unsigned long mu, long sigma,
116 struct crndstate *state, const struct disttable *dist)
124 rnd = get_crandom(state);
126 /* default uniform distribution */
128 return (rnd % (2*sigma)) - sigma + mu;
130 t = dist->table[rnd % dist->size];
131 x = (sigma % NETEM_DIST_SCALE) * t;
133 x += NETEM_DIST_SCALE/2;
135 x -= NETEM_DIST_SCALE/2;
137 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
140 /* Put skb in the private delayed queue. */
141 static int netem_delay(struct Qdisc *sch, struct sk_buff *skb)
143 struct netem_sched_data *q = qdisc_priv(sch);
147 PSCHED_GET_TIME(now);
148 td = tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist);
150 /* Always queue at tail to keep packets in order */
151 if (likely(q->delayed.qlen < q->limit)) {
152 struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb;
154 PSCHED_TADD2(now, td, cb->time_to_send);
156 pr_debug("netem_delay: skb=%p now=%llu tosend=%llu\n", skb,
157 now, cb->time_to_send);
159 __skb_queue_tail(&q->delayed, skb);
160 return NET_XMIT_SUCCESS;
163 pr_debug("netem_delay: queue over limit %d\n", q->limit);
164 sch->qstats.overlimits++;
166 return NET_XMIT_DROP;
170 * Move a packet that is ready to send from the delay holding
171 * list to the underlying qdisc.
173 static int netem_run(struct Qdisc *sch)
175 struct netem_sched_data *q = qdisc_priv(sch);
179 PSCHED_GET_TIME(now);
181 skb = skb_peek(&q->delayed);
183 const struct netem_skb_cb *cb
184 = (const struct netem_skb_cb *)skb->cb;
186 = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now));
187 pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay);
189 /* if more time remaining? */
191 mod_timer(&q->timer, jiffies + delay);
195 __skb_unlink(skb, &q->delayed);
197 if (q->qdisc->enqueue(skb, q->qdisc)) {
207 * Insert one skb into qdisc.
208 * Note: parent depends on return value to account for queue length.
209 * NET_XMIT_DROP: queue length didn't change.
210 * NET_XMIT_SUCCESS: one skb was queued.
212 static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
214 struct netem_sched_data *q = qdisc_priv(sch);
215 struct sk_buff *skb2;
219 pr_debug("netem_enqueue skb=%p\n", skb);
221 /* Random duplication */
222 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
225 /* Random packet drop 0 => none, ~0 => all */
226 if (q->loss && q->loss >= get_crandom(&q->loss_cor))
232 return NET_XMIT_DROP;
236 * If we need to duplicate packet, then re-insert at top of the
237 * qdisc tree, since parent queuer expects that only one
238 * skb will be queued.
240 if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
241 struct Qdisc *rootq = sch->dev->qdisc;
242 u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
245 rootq->enqueue(skb2, rootq);
246 q->duplicate = dupsave;
249 /* If doing simple delay then gap == 0 so all packets
250 * go into the delayed holding queue
251 * otherwise if doing out of order only "1 out of gap"
252 * packets will be delayed.
254 if (q->counter < q->gap) {
256 ret = q->qdisc->enqueue(skb, q->qdisc);
259 ret = netem_delay(sch, skb);
263 if (likely(ret == NET_XMIT_SUCCESS)) {
265 sch->bstats.bytes += skb->len;
266 sch->bstats.packets++;
270 pr_debug("netem: enqueue ret %d\n", ret);
274 /* Requeue packets but don't change time stamp */
275 static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch)
277 struct netem_sched_data *q = qdisc_priv(sch);
280 if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
282 sch->qstats.requeues++;
288 static unsigned int netem_drop(struct Qdisc* sch)
290 struct netem_sched_data *q = qdisc_priv(sch);
293 if ((len = q->qdisc->ops->drop(q->qdisc)) != 0) {
300 static struct sk_buff *netem_dequeue(struct Qdisc *sch)
302 struct netem_sched_data *q = qdisc_priv(sch);
306 pending = netem_run(sch);
308 skb = q->qdisc->dequeue(q->qdisc);
310 pr_debug("netem_dequeue: return skb=%p\n", skb);
312 sch->flags &= ~TCQ_F_THROTTLED;
315 pr_debug("netem_dequeue: throttling\n");
316 sch->flags |= TCQ_F_THROTTLED;
322 static void netem_watchdog(unsigned long arg)
324 struct Qdisc *sch = (struct Qdisc *)arg;
326 pr_debug("netem_watchdog qlen=%d\n", sch->q.qlen);
327 sch->flags &= ~TCQ_F_THROTTLED;
328 netif_schedule(sch->dev);
331 static void netem_reset(struct Qdisc *sch)
333 struct netem_sched_data *q = qdisc_priv(sch);
335 qdisc_reset(q->qdisc);
336 skb_queue_purge(&q->delayed);
339 sch->flags &= ~TCQ_F_THROTTLED;
340 del_timer_sync(&q->timer);
343 static int set_fifo_limit(struct Qdisc *q, int limit)
348 rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
350 rta->rta_type = RTM_NEWQDISC;
351 rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt));
352 ((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit;
354 ret = q->ops->change(q, rta);
361 * Distribution data is a variable size payload containing
362 * signed 16 bit values.
364 static int get_dist_table(struct Qdisc *sch, const struct rtattr *attr)
366 struct netem_sched_data *q = qdisc_priv(sch);
367 unsigned long n = RTA_PAYLOAD(attr)/sizeof(__s16);
368 const __s16 *data = RTA_DATA(attr);
375 d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL);
380 for (i = 0; i < n; i++)
381 d->table[i] = data[i];
383 spin_lock_bh(&sch->dev->queue_lock);
384 d = xchg(&q->delay_dist, d);
385 spin_unlock_bh(&sch->dev->queue_lock);
391 static int get_correlation(struct Qdisc *sch, const struct rtattr *attr)
393 struct netem_sched_data *q = qdisc_priv(sch);
394 const struct tc_netem_corr *c = RTA_DATA(attr);
396 if (RTA_PAYLOAD(attr) != sizeof(*c))
399 init_crandom(&q->delay_cor, c->delay_corr);
400 init_crandom(&q->loss_cor, c->loss_corr);
401 init_crandom(&q->dup_cor, c->dup_corr);
405 static int netem_change(struct Qdisc *sch, struct rtattr *opt)
407 struct netem_sched_data *q = qdisc_priv(sch);
408 struct tc_netem_qopt *qopt;
411 if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt))
414 qopt = RTA_DATA(opt);
415 ret = set_fifo_limit(q->qdisc, qopt->limit);
417 pr_debug("netem: can't set fifo limit\n");
421 q->latency = qopt->latency;
422 q->jitter = qopt->jitter;
423 q->limit = qopt->limit;
425 q->loss = qopt->loss;
426 q->duplicate = qopt->duplicate;
428 /* Handle nested options after initial queue options.
429 * Should have put all options in nested format but too late now.
431 if (RTA_PAYLOAD(opt) > sizeof(*qopt)) {
432 struct rtattr *tb[TCA_NETEM_MAX];
433 if (rtattr_parse(tb, TCA_NETEM_MAX,
434 RTA_DATA(opt) + sizeof(*qopt),
435 RTA_PAYLOAD(opt) - sizeof(*qopt)))
438 if (tb[TCA_NETEM_CORR-1]) {
439 ret = get_correlation(sch, tb[TCA_NETEM_CORR-1]);
444 if (tb[TCA_NETEM_DELAY_DIST-1]) {
445 ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST-1]);
455 static int netem_init(struct Qdisc *sch, struct rtattr *opt)
457 struct netem_sched_data *q = qdisc_priv(sch);
463 skb_queue_head_init(&q->delayed);
464 init_timer(&q->timer);
465 q->timer.function = netem_watchdog;
466 q->timer.data = (unsigned long) sch;
469 q->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
471 pr_debug("netem: qdisc create failed\n");
475 ret = netem_change(sch, opt);
477 pr_debug("netem: change failed\n");
478 qdisc_destroy(q->qdisc);
483 static void netem_destroy(struct Qdisc *sch)
485 struct netem_sched_data *q = qdisc_priv(sch);
487 del_timer_sync(&q->timer);
488 qdisc_destroy(q->qdisc);
489 kfree(q->delay_dist);
492 static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
494 const struct netem_sched_data *q = qdisc_priv(sch);
495 unsigned char *b = skb->tail;
496 struct rtattr *rta = (struct rtattr *) b;
497 struct tc_netem_qopt qopt;
498 struct tc_netem_corr cor;
500 qopt.latency = q->latency;
501 qopt.jitter = q->jitter;
502 qopt.limit = q->limit;
505 qopt.duplicate = q->duplicate;
506 RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
508 cor.delay_corr = q->delay_cor.rho;
509 cor.loss_corr = q->loss_cor.rho;
510 cor.dup_corr = q->dup_cor.rho;
511 RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
512 rta->rta_len = skb->tail - b;
517 skb_trim(skb, b - skb->data);
521 static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
522 struct sk_buff *skb, struct tcmsg *tcm)
524 struct netem_sched_data *q = qdisc_priv(sch);
526 if (cl != 1) /* only one class */
529 tcm->tcm_handle |= TC_H_MIN(1);
530 tcm->tcm_info = q->qdisc->handle;
535 static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
538 struct netem_sched_data *q = qdisc_priv(sch);
544 *old = xchg(&q->qdisc, new);
547 sch_tree_unlock(sch);
552 static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
554 struct netem_sched_data *q = qdisc_priv(sch);
558 static unsigned long netem_get(struct Qdisc *sch, u32 classid)
563 static void netem_put(struct Qdisc *sch, unsigned long arg)
567 static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
568 struct rtattr **tca, unsigned long *arg)
573 static int netem_delete(struct Qdisc *sch, unsigned long arg)
578 static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
581 if (walker->count >= walker->skip)
582 if (walker->fn(sch, 1, walker) < 0) {
590 static struct tcf_proto **netem_find_tcf(struct Qdisc *sch, unsigned long cl)
595 static struct Qdisc_class_ops netem_class_ops = {
596 .graft = netem_graft,
600 .change = netem_change_class,
601 .delete = netem_delete,
603 .tcf_chain = netem_find_tcf,
604 .dump = netem_dump_class,
607 static struct Qdisc_ops netem_qdisc_ops = {
609 .cl_ops = &netem_class_ops,
610 .priv_size = sizeof(struct netem_sched_data),
611 .enqueue = netem_enqueue,
612 .dequeue = netem_dequeue,
613 .requeue = netem_requeue,
616 .reset = netem_reset,
617 .destroy = netem_destroy,
618 .change = netem_change,
620 .owner = THIS_MODULE,
624 static int __init netem_module_init(void)
626 return register_qdisc(&netem_qdisc_ops);
628 static void __exit netem_module_exit(void)
630 unregister_qdisc(&netem_qdisc_ops);
632 module_init(netem_module_init)
633 module_exit(netem_module_exit)
634 MODULE_LICENSE("GPL");