]> err.no Git - linux-2.6/blob - net/sched/sch_teql.c
8ac05981be202ab5090c2c466e8855bcf7ff83e4
[linux-2.6] / net / sched / sch_teql.c
1 /* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
2  *
3  *              This program is free software; you can redistribute it and/or
4  *              modify it under the terms of the GNU General Public License
5  *              as published by the Free Software Foundation; either version
6  *              2 of the License, or (at your option) any later version.
7  *
8  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/string.h>
15 #include <linux/errno.h>
16 #include <linux/if_arp.h>
17 #include <linux/netdevice.h>
18 #include <linux/init.h>
19 #include <linux/skbuff.h>
20 #include <linux/moduleparam.h>
21 #include <net/dst.h>
22 #include <net/neighbour.h>
23 #include <net/pkt_sched.h>
24
25 /*
26    How to setup it.
27    ----------------
28
29    After loading this module you will find a new device teqlN
30    and new qdisc with the same name. To join a slave to the equalizer
31    you should just set this qdisc on a device f.e.
32
33    # tc qdisc add dev eth0 root teql0
34    # tc qdisc add dev eth1 root teql0
35
36    That's all. Full PnP 8)
37
38    Applicability.
39    --------------
40
41    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
42       signal and generate EOI events. If you want to equalize virtual devices
43       like tunnels, use a normal eql device.
44    2. This device puts no limitations on physical slave characteristics
45       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
46       Certainly, large difference in link speeds will make the resulting
47       eqalized link unusable, because of huge packet reordering.
48       I estimate an upper useful difference as ~10 times.
49    3. If the slave requires address resolution, only protocols using
50       neighbour cache (IPv4/IPv6) will work over the equalized link.
51       Other protocols are still allowed to use the slave device directly,
52       which will not break load balancing, though native slave
53       traffic will have the highest priority.  */
54
55 struct teql_master
56 {
57         struct Qdisc_ops qops;
58         struct net_device *dev;
59         struct Qdisc *slaves;
60         struct list_head master_list;
61         struct net_device_stats stats;
62 };
63
64 struct teql_sched_data
65 {
66         struct Qdisc *next;
67         struct teql_master *m;
68         struct neighbour *ncache;
69         struct sk_buff_head q;
70 };
71
72 #define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
73
74 #define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
75
76 /* "teql*" qdisc routines */
77
78 static int
79 teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
80 {
81         struct net_device *dev = qdisc_dev(sch);
82         struct teql_sched_data *q = qdisc_priv(sch);
83
84         if (q->q.qlen < dev->tx_queue_len) {
85                 __skb_queue_tail(&q->q, skb);
86                 sch->bstats.bytes += skb->len;
87                 sch->bstats.packets++;
88                 return 0;
89         }
90
91         kfree_skb(skb);
92         sch->qstats.drops++;
93         return NET_XMIT_DROP;
94 }
95
96 static int
97 teql_requeue(struct sk_buff *skb, struct Qdisc* sch)
98 {
99         struct teql_sched_data *q = qdisc_priv(sch);
100
101         __skb_queue_head(&q->q, skb);
102         sch->qstats.requeues++;
103         return 0;
104 }
105
106 static struct sk_buff *
107 teql_dequeue(struct Qdisc* sch)
108 {
109         struct teql_sched_data *dat = qdisc_priv(sch);
110         struct netdev_queue *dat_queue;
111         struct sk_buff *skb;
112
113         skb = __skb_dequeue(&dat->q);
114         dat_queue = &dat->m->dev->tx_queue;
115         if (skb == NULL) {
116                 struct net_device *m = qdisc_dev(dat_queue->qdisc);
117                 if (m) {
118                         dat->m->slaves = sch;
119                         netif_wake_queue(m);
120                 }
121         }
122         sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
123         return skb;
124 }
125
126 static __inline__ void
127 teql_neigh_release(struct neighbour *n)
128 {
129         if (n)
130                 neigh_release(n);
131 }
132
133 static void
134 teql_reset(struct Qdisc* sch)
135 {
136         struct teql_sched_data *dat = qdisc_priv(sch);
137
138         skb_queue_purge(&dat->q);
139         sch->q.qlen = 0;
140         teql_neigh_release(xchg(&dat->ncache, NULL));
141 }
142
143 static void
144 teql_destroy(struct Qdisc* sch)
145 {
146         struct Qdisc *q, *prev;
147         struct teql_sched_data *dat = qdisc_priv(sch);
148         struct teql_master *master = dat->m;
149
150         if ((prev = master->slaves) != NULL) {
151                 do {
152                         q = NEXT_SLAVE(prev);
153                         if (q == sch) {
154                                 NEXT_SLAVE(prev) = NEXT_SLAVE(q);
155                                 if (q == master->slaves) {
156                                         master->slaves = NEXT_SLAVE(q);
157                                         if (q == master->slaves) {
158                                                 master->slaves = NULL;
159                                                 spin_lock_bh(&master->dev->tx_queue.lock);
160                                                 qdisc_reset(master->dev->tx_queue.qdisc);
161                                                 spin_unlock_bh(&master->dev->tx_queue.lock);
162                                         }
163                                 }
164                                 skb_queue_purge(&dat->q);
165                                 teql_neigh_release(xchg(&dat->ncache, NULL));
166                                 break;
167                         }
168
169                 } while ((prev = q) != master->slaves);
170         }
171 }
172
173 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
174 {
175         struct net_device *dev = qdisc_dev(sch);
176         struct teql_master *m = (struct teql_master*)sch->ops;
177         struct teql_sched_data *q = qdisc_priv(sch);
178
179         if (dev->hard_header_len > m->dev->hard_header_len)
180                 return -EINVAL;
181
182         if (m->dev == dev)
183                 return -ELOOP;
184
185         q->m = m;
186
187         skb_queue_head_init(&q->q);
188
189         if (m->slaves) {
190                 if (m->dev->flags & IFF_UP) {
191                         if ((m->dev->flags&IFF_POINTOPOINT && !(dev->flags&IFF_POINTOPOINT))
192                             || (m->dev->flags&IFF_BROADCAST && !(dev->flags&IFF_BROADCAST))
193                             || (m->dev->flags&IFF_MULTICAST && !(dev->flags&IFF_MULTICAST))
194                             || dev->mtu < m->dev->mtu)
195                                 return -EINVAL;
196                 } else {
197                         if (!(dev->flags&IFF_POINTOPOINT))
198                                 m->dev->flags &= ~IFF_POINTOPOINT;
199                         if (!(dev->flags&IFF_BROADCAST))
200                                 m->dev->flags &= ~IFF_BROADCAST;
201                         if (!(dev->flags&IFF_MULTICAST))
202                                 m->dev->flags &= ~IFF_MULTICAST;
203                         if (dev->mtu < m->dev->mtu)
204                                 m->dev->mtu = dev->mtu;
205                 }
206                 q->next = NEXT_SLAVE(m->slaves);
207                 NEXT_SLAVE(m->slaves) = sch;
208         } else {
209                 q->next = sch;
210                 m->slaves = sch;
211                 m->dev->mtu = dev->mtu;
212                 m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
213         }
214         return 0;
215 }
216
217
218 static int
219 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
220 {
221         struct teql_sched_data *q = qdisc_priv(dev->tx_queue.qdisc);
222         struct neighbour *mn = skb->dst->neighbour;
223         struct neighbour *n = q->ncache;
224
225         if (mn->tbl == NULL)
226                 return -EINVAL;
227         if (n && n->tbl == mn->tbl &&
228             memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
229                 atomic_inc(&n->refcnt);
230         } else {
231                 n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
232                 if (IS_ERR(n))
233                         return PTR_ERR(n);
234         }
235         if (neigh_event_send(n, skb_res) == 0) {
236                 int err;
237
238                 read_lock(&n->lock);
239                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
240                                       n->ha, NULL, skb->len);
241                 read_unlock(&n->lock);
242
243                 if (err < 0) {
244                         neigh_release(n);
245                         return -EINVAL;
246                 }
247                 teql_neigh_release(xchg(&q->ncache, n));
248                 return 0;
249         }
250         neigh_release(n);
251         return (skb_res == NULL) ? -EAGAIN : 1;
252 }
253
254 static inline int teql_resolve(struct sk_buff *skb,
255                                struct sk_buff *skb_res, struct net_device *dev)
256 {
257         if (dev->tx_queue.qdisc == &noop_qdisc)
258                 return -ENODEV;
259
260         if (dev->header_ops == NULL ||
261             skb->dst == NULL ||
262             skb->dst->neighbour == NULL)
263                 return 0;
264         return __teql_resolve(skb, skb_res, dev);
265 }
266
267 static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
268 {
269         struct teql_master *master = netdev_priv(dev);
270         struct Qdisc *start, *q;
271         int busy;
272         int nores;
273         int len = skb->len;
274         int subq = skb_get_queue_mapping(skb);
275         struct sk_buff *skb_res = NULL;
276
277         start = master->slaves;
278
279 restart:
280         nores = 0;
281         busy = 0;
282
283         if ((q = start) == NULL)
284                 goto drop;
285
286         do {
287                 struct net_device *slave = qdisc_dev(q);
288
289                 if (slave->tx_queue.qdisc_sleeping != q)
290                         continue;
291                 if (netif_queue_stopped(slave) ||
292                     __netif_subqueue_stopped(slave, subq) ||
293                     !netif_running(slave)) {
294                         busy = 1;
295                         continue;
296                 }
297
298                 switch (teql_resolve(skb, skb_res, slave)) {
299                 case 0:
300                         if (netif_tx_trylock(slave)) {
301                                 if (!netif_queue_stopped(slave) &&
302                                     !__netif_subqueue_stopped(slave, subq) &&
303                                     slave->hard_start_xmit(skb, slave) == 0) {
304                                         netif_tx_unlock(slave);
305                                         master->slaves = NEXT_SLAVE(q);
306                                         netif_wake_queue(dev);
307                                         master->stats.tx_packets++;
308                                         master->stats.tx_bytes += len;
309                                         return 0;
310                                 }
311                                 netif_tx_unlock(slave);
312                         }
313                         if (netif_queue_stopped(dev))
314                                 busy = 1;
315                         break;
316                 case 1:
317                         master->slaves = NEXT_SLAVE(q);
318                         return 0;
319                 default:
320                         nores = 1;
321                         break;
322                 }
323                 __skb_pull(skb, skb_network_offset(skb));
324         } while ((q = NEXT_SLAVE(q)) != start);
325
326         if (nores && skb_res == NULL) {
327                 skb_res = skb;
328                 goto restart;
329         }
330
331         if (busy) {
332                 netif_stop_queue(dev);
333                 return 1;
334         }
335         master->stats.tx_errors++;
336
337 drop:
338         master->stats.tx_dropped++;
339         dev_kfree_skb(skb);
340         return 0;
341 }
342
343 static int teql_master_open(struct net_device *dev)
344 {
345         struct Qdisc * q;
346         struct teql_master *m = netdev_priv(dev);
347         int mtu = 0xFFFE;
348         unsigned flags = IFF_NOARP|IFF_MULTICAST;
349
350         if (m->slaves == NULL)
351                 return -EUNATCH;
352
353         flags = FMASK;
354
355         q = m->slaves;
356         do {
357                 struct net_device *slave = qdisc_dev(q);
358
359                 if (slave == NULL)
360                         return -EUNATCH;
361
362                 if (slave->mtu < mtu)
363                         mtu = slave->mtu;
364                 if (slave->hard_header_len > LL_MAX_HEADER)
365                         return -EINVAL;
366
367                 /* If all the slaves are BROADCAST, master is BROADCAST
368                    If all the slaves are PtP, master is PtP
369                    Otherwise, master is NBMA.
370                  */
371                 if (!(slave->flags&IFF_POINTOPOINT))
372                         flags &= ~IFF_POINTOPOINT;
373                 if (!(slave->flags&IFF_BROADCAST))
374                         flags &= ~IFF_BROADCAST;
375                 if (!(slave->flags&IFF_MULTICAST))
376                         flags &= ~IFF_MULTICAST;
377         } while ((q = NEXT_SLAVE(q)) != m->slaves);
378
379         m->dev->mtu = mtu;
380         m->dev->flags = (m->dev->flags&~FMASK) | flags;
381         netif_start_queue(m->dev);
382         return 0;
383 }
384
385 static int teql_master_close(struct net_device *dev)
386 {
387         netif_stop_queue(dev);
388         return 0;
389 }
390
391 static struct net_device_stats *teql_master_stats(struct net_device *dev)
392 {
393         struct teql_master *m = netdev_priv(dev);
394         return &m->stats;
395 }
396
397 static int teql_master_mtu(struct net_device *dev, int new_mtu)
398 {
399         struct teql_master *m = netdev_priv(dev);
400         struct Qdisc *q;
401
402         if (new_mtu < 68)
403                 return -EINVAL;
404
405         q = m->slaves;
406         if (q) {
407                 do {
408                         if (new_mtu > qdisc_dev(q)->mtu)
409                                 return -EINVAL;
410                 } while ((q=NEXT_SLAVE(q)) != m->slaves);
411         }
412
413         dev->mtu = new_mtu;
414         return 0;
415 }
416
417 static __init void teql_master_setup(struct net_device *dev)
418 {
419         struct teql_master *master = netdev_priv(dev);
420         struct Qdisc_ops *ops = &master->qops;
421
422         master->dev     = dev;
423         ops->priv_size  = sizeof(struct teql_sched_data);
424
425         ops->enqueue    =       teql_enqueue;
426         ops->dequeue    =       teql_dequeue;
427         ops->requeue    =       teql_requeue;
428         ops->init       =       teql_qdisc_init;
429         ops->reset      =       teql_reset;
430         ops->destroy    =       teql_destroy;
431         ops->owner      =       THIS_MODULE;
432
433         dev->open               = teql_master_open;
434         dev->hard_start_xmit    = teql_master_xmit;
435         dev->stop               = teql_master_close;
436         dev->get_stats          = teql_master_stats;
437         dev->change_mtu         = teql_master_mtu;
438         dev->type               = ARPHRD_VOID;
439         dev->mtu                = 1500;
440         dev->tx_queue_len       = 100;
441         dev->flags              = IFF_NOARP;
442         dev->hard_header_len    = LL_MAX_HEADER;
443 }
444
445 static LIST_HEAD(master_dev_list);
446 static int max_equalizers = 1;
447 module_param(max_equalizers, int, 0);
448 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
449
450 static int __init teql_init(void)
451 {
452         int i;
453         int err = -ENODEV;
454
455         for (i = 0; i < max_equalizers; i++) {
456                 struct net_device *dev;
457                 struct teql_master *master;
458
459                 dev = alloc_netdev(sizeof(struct teql_master),
460                                   "teql%d", teql_master_setup);
461                 if (!dev) {
462                         err = -ENOMEM;
463                         break;
464                 }
465
466                 if ((err = register_netdev(dev))) {
467                         free_netdev(dev);
468                         break;
469                 }
470
471                 master = netdev_priv(dev);
472
473                 strlcpy(master->qops.id, dev->name, IFNAMSIZ);
474                 err = register_qdisc(&master->qops);
475
476                 if (err) {
477                         unregister_netdev(dev);
478                         free_netdev(dev);
479                         break;
480                 }
481
482                 list_add_tail(&master->master_list, &master_dev_list);
483         }
484         return i ? 0 : err;
485 }
486
487 static void __exit teql_exit(void)
488 {
489         struct teql_master *master, *nxt;
490
491         list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
492
493                 list_del(&master->master_list);
494
495                 unregister_qdisc(&master->qops);
496                 unregister_netdev(master->dev);
497                 free_netdev(master->dev);
498         }
499 }
500
501 module_init(teql_init);
502 module_exit(teql_exit);
503
504 MODULE_LICENSE("GPL");