]> err.no Git - linux-2.6/blobdiff - net/core/dev.c
Merge git://git.kernel.org/pub/scm/linux/kernel/git/lethal/sh-2.6
[linux-2.6] / net / core / dev.c
index 69378f2506955acd69bf3d9e3b6b2b9fc584a778..63d6bcddbf46d1b09388191da411d038b77adee6 100644 (file)
 #include <linux/ctype.h>
 #include <linux/if_arp.h>
 #include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/jhash.h>
+#include <linux/random.h>
 
 #include "net-sysfs.h"
 
@@ -256,7 +261,7 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
 
 DEFINE_PER_CPU(struct softnet_data, softnet_data);
 
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#ifdef CONFIG_LOCKDEP
 /*
  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
  * according to dev->type
@@ -296,6 +301,7 @@ static const char *netdev_lock_name[] =
         "_xmit_NONE"};
 
 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
+static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
 
 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
 {
@@ -308,8 +314,8 @@ static inline unsigned short netdev_lock_pos(unsigned short dev_type)
        return ARRAY_SIZE(netdev_lock_type) - 1;
 }
 
-static inline void netdev_set_lockdep_class(spinlock_t *lock,
-                                           unsigned short dev_type)
+static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
+                                                unsigned short dev_type)
 {
        int i;
 
@@ -317,9 +323,22 @@ static inline void netdev_set_lockdep_class(spinlock_t *lock,
        lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
                                   netdev_lock_name[i]);
 }
+
+static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
+{
+       int i;
+
+       i = netdev_lock_pos(dev->type);
+       lockdep_set_class_and_name(&dev->addr_list_lock,
+                                  &netdev_addr_lock_key[i],
+                                  netdev_lock_name[i]);
+}
 #else
-static inline void netdev_set_lockdep_class(spinlock_t *lock,
-                                           unsigned short dev_type)
+static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
+                                                unsigned short dev_type)
+{
+}
+static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
 {
 }
 #endif
@@ -1320,18 +1339,16 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 }
 
 
-void __netif_schedule(struct netdev_queue *txq)
+void __netif_schedule(struct Qdisc *q)
 {
-       struct net_device *dev = txq->dev;
-
-       if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
+       if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) {
                struct softnet_data *sd;
                unsigned long flags;
 
                local_irq_save(flags);
                sd = &__get_cpu_var(softnet_data);
-               txq->next_sched = sd->output_queue;
-               sd->output_queue = txq;
+               q->next_sched = sd->output_queue;
+               sd->output_queue = q;
                raise_softirq_irqoff(NET_TX_SOFTIRQ);
                local_irq_restore(flags);
        }
@@ -1598,7 +1615,8 @@ static int dev_gso_segment(struct sk_buff *skb)
        return 0;
 }
 
-int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
+int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
+                       struct netdev_queue *txq)
 {
        if (likely(!skb->next)) {
                if (!list_empty(&ptype_all))
@@ -1627,9 +1645,7 @@ gso:
                        skb->next = nskb;
                        return rc;
                }
-               if (unlikely((netif_queue_stopped(dev) ||
-                            netif_subqueue_stopped(dev, skb)) &&
-                            skb->next))
+               if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
                        return NETDEV_TX_BUSY;
        } while (skb->next);
 
@@ -1640,6 +1656,73 @@ out_kfree_skb:
        return 0;
 }
 
+static u32 simple_tx_hashrnd;
+static int simple_tx_hashrnd_initialized = 0;
+
+static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
+{
+       u32 addr1, addr2, ports;
+       u32 hash, ihl;
+       u8 ip_proto;
+
+       if (unlikely(!simple_tx_hashrnd_initialized)) {
+               get_random_bytes(&simple_tx_hashrnd, 4);
+               simple_tx_hashrnd_initialized = 1;
+       }
+
+       switch (skb->protocol) {
+       case __constant_htons(ETH_P_IP):
+               ip_proto = ip_hdr(skb)->protocol;
+               addr1 = ip_hdr(skb)->saddr;
+               addr2 = ip_hdr(skb)->daddr;
+               ihl = ip_hdr(skb)->ihl;
+               break;
+       case __constant_htons(ETH_P_IPV6):
+               ip_proto = ipv6_hdr(skb)->nexthdr;
+               addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
+               addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
+               ihl = (40 >> 2);
+               break;
+       default:
+               return 0;
+       }
+
+
+       switch (ip_proto) {
+       case IPPROTO_TCP:
+       case IPPROTO_UDP:
+       case IPPROTO_DCCP:
+       case IPPROTO_ESP:
+       case IPPROTO_AH:
+       case IPPROTO_SCTP:
+       case IPPROTO_UDPLITE:
+               ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
+               break;
+
+       default:
+               ports = 0;
+               break;
+       }
+
+       hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd);
+
+       return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
+}
+
+static struct netdev_queue *dev_pick_tx(struct net_device *dev,
+                                       struct sk_buff *skb)
+{
+       u16 queue_index = 0;
+
+       if (dev->select_queue)
+               queue_index = dev->select_queue(dev, skb);
+       else if (dev->real_num_tx_queues > 1)
+               queue_index = simple_tx_hash(dev, skb);
+
+       skb_set_queue_mapping(skb, queue_index);
+       return netdev_get_tx_queue(dev, queue_index);
+}
+
 /**
  *     dev_queue_xmit - transmit a buffer
  *     @skb: buffer to transmit
@@ -1665,13 +1748,6 @@ out_kfree_skb:
  *      the BH enable code must have IRQs enabled so that it will not deadlock.
  *          --BLG
  */
-
-static struct netdev_queue *dev_pick_tx(struct net_device *dev,
-                                       struct sk_buff *skb)
-{
-       return netdev_get_tx_queue(dev, 0);
-}
-
 int dev_queue_xmit(struct sk_buff *skb)
 {
        struct net_device *dev = skb->dev;
@@ -1708,45 +1784,29 @@ int dev_queue_xmit(struct sk_buff *skb)
        }
 
 gso:
-       txq = dev_pick_tx(dev, skb);
-       spin_lock_prefetch(&txq->lock);
-
        /* Disable soft irqs for various locks below. Also
         * stops preemption for RCU.
         */
        rcu_read_lock_bh();
 
-       /* Updates of qdisc are serialized by queue->lock.
-        * The struct Qdisc which is pointed to by qdisc is now a
-        * rcu structure - it may be accessed without acquiring
-        * a lock (but the structure may be stale.) The freeing of the
-        * qdisc will be deferred until it's known that there are no
-        * more references to it.
-        *
-        * If the qdisc has an enqueue function, we still need to
-        * hold the queue->lock before calling it, since queue->lock
-        * also serializes access to the device queue.
-        */
-
+       txq = dev_pick_tx(dev, skb);
        q = rcu_dereference(txq->qdisc);
+
 #ifdef CONFIG_NET_CLS_ACT
        skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
 #endif
        if (q->enqueue) {
-               /* Grab device queue */
-               spin_lock(&txq->lock);
-               q = txq->qdisc;
-               if (q->enqueue) {
-                       /* reset queue_mapping to zero */
-                       skb_set_queue_mapping(skb, 0);
-                       rc = q->enqueue(skb, q);
-                       qdisc_run(txq);
-                       spin_unlock(&txq->lock);
-
-                       rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
-                       goto out;
-               }
-               spin_unlock(&txq->lock);
+               spinlock_t *root_lock = qdisc_root_lock(q);
+
+               spin_lock(root_lock);
+
+               rc = qdisc_enqueue_root(skb, q);
+               qdisc_run(q);
+
+               spin_unlock(root_lock);
+
+               rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
+               goto out;
        }
 
        /* The device has no queue. Common case for software devices:
@@ -1768,10 +1828,9 @@ gso:
 
                        HARD_TX_LOCK(dev, txq, cpu);
 
-                       if (!netif_queue_stopped(dev) &&
-                           !netif_subqueue_stopped(dev, skb)) {
+                       if (!netif_tx_queue_stopped(txq)) {
                                rc = 0;
-                               if (!dev_hard_start_xmit(skb, dev)) {
+                               if (!dev_hard_start_xmit(skb, dev, txq)) {
                                        HARD_TX_UNLOCK(dev, txq);
                                        goto out;
                                }
@@ -1914,13 +1973,13 @@ static void net_tx_action(struct softirq_action *h)
                        struct sk_buff *skb = clist;
                        clist = clist->next;
 
-                       BUG_TRAP(!atomic_read(&skb->users));
+                       WARN_ON(atomic_read(&skb->users));
                        __kfree_skb(skb);
                }
        }
 
        if (sd->output_queue) {
-               struct netdev_queue *head;
+               struct Qdisc *head;
 
                local_irq_disable();
                head = sd->output_queue;
@@ -1928,18 +1987,20 @@ static void net_tx_action(struct softirq_action *h)
                local_irq_enable();
 
                while (head) {
-                       struct netdev_queue *txq = head;
-                       struct net_device *dev = txq->dev;
+                       struct Qdisc *q = head;
+                       spinlock_t *root_lock;
+
                        head = head->next_sched;
 
                        smp_mb__before_clear_bit();
-                       clear_bit(__LINK_STATE_SCHED, &dev->state);
+                       clear_bit(__QDISC_STATE_SCHED, &q->state);
 
-                       if (spin_trylock(&txq->lock)) {
-                               qdisc_run(txq);
-                               spin_unlock(&txq->lock);
+                       root_lock = qdisc_root_lock(q);
+                       if (spin_trylock(root_lock)) {
+                               qdisc_run(q);
+                               spin_unlock(root_lock);
                        } else {
-                               netif_schedule_queue(txq);
+                               __netif_schedule(q);
                        }
                }
        }
@@ -2038,10 +2099,12 @@ static int ing_filter(struct sk_buff *skb)
 
        rxq = &dev->rx_queue;
 
-       spin_lock(&rxq->lock);
-       if ((q = rxq->qdisc) != NULL)
-               result = q->enqueue(skb, q);
-       spin_unlock(&rxq->lock);
+       q = rxq->qdisc;
+       if (q != &noop_qdisc) {
+               spin_lock(qdisc_lock(q));
+               result = qdisc_enqueue_root(skb, q);
+               spin_unlock(qdisc_lock(q));
+       }
 
        return result;
 }
@@ -2050,7 +2113,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
                                         struct packet_type **pt_prev,
                                         int *ret, struct net_device *orig_dev)
 {
-       if (!skb->dev->rx_queue.qdisc)
+       if (skb->dev->rx_queue.qdisc == &noop_qdisc)
                goto out;
 
        if (*pt_prev) {
@@ -2332,7 +2395,7 @@ out:
         */
        if (!cpus_empty(net_dma.channel_mask)) {
                int chan_idx;
-               for_each_cpu_mask(chan_idx, net_dma.channel_mask) {
+               for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) {
                        struct dma_chan *chan = net_dma.channels[chan_idx];
                        if (chan)
                                dma_async_memcpy_issue_pending(chan);
@@ -3784,7 +3847,7 @@ static void rollback_registered(struct net_device *dev)
                dev->uninit(dev);
 
        /* Notifier chain MUST detach us from master device. */
-       BUG_TRAP(!dev->master);
+       WARN_ON(dev->master);
 
        /* Remove entries from kobject tree */
        netdev_unregister_kobject(dev);
@@ -3799,7 +3862,7 @@ static void __netdev_init_queue_locks_one(struct net_device *dev,
                                          void *_unused)
 {
        spin_lock_init(&dev_queue->_xmit_lock);
-       netdev_set_lockdep_class(&dev_queue->_xmit_lock, dev->type);
+       netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
        dev_queue->xmit_lock_owner = -1;
 }
 
@@ -3844,6 +3907,7 @@ int register_netdevice(struct net_device *dev)
        net = dev_net(dev);
 
        spin_lock_init(&dev->addr_list_lock);
+       netdev_set_addr_lockdep_class(dev);
        netdev_init_queue_locks(dev);
 
        dev->iflink = -1;
@@ -4105,9 +4169,9 @@ void netdev_run_todo(void)
 
                /* paranoia */
                BUG_ON(atomic_read(&dev->refcnt));
-               BUG_TRAP(!dev->ip_ptr);
-               BUG_TRAP(!dev->ip6_ptr);
-               BUG_TRAP(!dev->dn_ptr);
+               WARN_ON(dev->ip_ptr);
+               WARN_ON(dev->ip6_ptr);
+               WARN_ON(dev->dn_ptr);
 
                if (dev->destructor)
                        dev->destructor(dev);
@@ -4129,7 +4193,6 @@ static void netdev_init_one_queue(struct net_device *dev,
                                  struct netdev_queue *queue,
                                  void *_unused)
 {
-       spin_lock_init(&queue->lock);
        queue->dev = dev;
 }
 
@@ -4155,13 +4218,12 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 {
        struct netdev_queue *tx;
        struct net_device *dev;
-       int alloc_size;
+       size_t alloc_size;
        void *p;
 
        BUG_ON(strlen(name) >= sizeof(dev->name));
 
-       alloc_size = sizeof(struct net_device) +
-                    sizeof(struct net_device_subqueue) * (queue_count - 1);
+       alloc_size = sizeof(struct net_device);
        if (sizeof_priv) {
                /* ensure 32-byte alignment of private area */
                alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
@@ -4176,7 +4238,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
                return NULL;
        }
 
-       tx = kzalloc(sizeof(struct netdev_queue) * queue_count, GFP_KERNEL);
+       tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
        if (!tx) {
                printk(KERN_ERR "alloc_netdev: Unable to allocate "
                       "tx qdiscs.\n");
@@ -4191,16 +4253,14 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 
        dev->_tx = tx;
        dev->num_tx_queues = queue_count;
+       dev->real_num_tx_queues = queue_count;
 
        if (sizeof_priv) {
                dev->priv = ((char *)dev +
-                            ((sizeof(struct net_device) +
-                              (sizeof(struct net_device_subqueue) *
-                               (queue_count - 1)) + NETDEV_ALIGN_CONST)
+                            ((sizeof(struct net_device) + NETDEV_ALIGN_CONST)
                              & ~NETDEV_ALIGN_CONST));
        }
 
-       dev->egress_subqueue_count = queue_count;
        dev->gso_max_size = GSO_MAX_SIZE;
 
        netdev_init_queues(dev);
@@ -4408,7 +4468,7 @@ static int dev_cpu_callback(struct notifier_block *nfb,
                            void *ocpu)
 {
        struct sk_buff **list_skb;
-       struct netdev_queue **list_net;
+       struct Qdisc **list_net;
        struct sk_buff *skb;
        unsigned int cpu, oldcpu = (unsigned long)ocpu;
        struct softnet_data *sd, *oldsd;
@@ -4470,7 +4530,7 @@ static void net_dma_rebalance(struct net_dma *net_dma)
        i = 0;
        cpu = first_cpu(cpu_online_map);
 
-       for_each_cpu_mask(chan_idx, net_dma->channel_mask) {
+       for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) {
                chan = net_dma->channels[chan_idx];
 
                n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
@@ -4637,6 +4697,26 @@ err_name:
        return -ENOMEM;
 }
 
+char *netdev_drivername(struct net_device *dev, char *buffer, int len)
+{
+       struct device_driver *driver;
+       struct device *parent;
+
+       if (len <= 0 || !buffer)
+               return buffer;
+       buffer[0] = 0;
+
+       parent = dev->dev.parent;
+
+       if (!parent)
+               return buffer;
+
+       driver = parent->driver;
+       if (driver && driver->name)
+               strlcpy(buffer, driver->name, len);
+       return buffer;
+}
+
 static void __net_exit netdev_exit(struct net *net)
 {
        kfree(net->dev_name_head);
@@ -4733,8 +4813,8 @@ static int __init net_dev_init(void)
 
        dev_boot_phase = 0;
 
-       open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
-       open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
+       open_softirq(NET_TX_SOFTIRQ, net_tx_action);
+       open_softirq(NET_RX_SOFTIRQ, net_rx_action);
 
        hotcpu_notifier(dev_cpu_callback, 0);
        dst_init();