X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=net%2Fcore%2Fdev.c;h=ef1502d71f25cb4c5e30bd769ced1dec760212c3;hb=e308a5d806c852f56590ffdd3834d0df0cbed8d7;hp=58296307787746e99da8bdcfe006b9b39af32de1;hpb=43154d08d6bb5c69aa0d0e3448fb348b4cd84e91;p=linux-2.6 diff --git a/net/core/dev.c b/net/core/dev.c index 5829630778..ef1502d71f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -90,6 +90,7 @@ #include #include #include +#include #include #include #include @@ -119,6 +120,7 @@ #include #include #include +#include #include "net-sysfs.h" @@ -256,7 +258,7 @@ DEFINE_PER_CPU(struct softnet_data, softnet_data); #ifdef CONFIG_DEBUG_LOCK_ALLOC /* - * register_netdevice() inits dev->_xmit_lock and sets lockdep class + * register_netdevice() inits txq->_xmit_lock and sets lockdep class * according to dev->type */ static const unsigned short netdev_lock_type[] = @@ -453,7 +455,7 @@ static int netdev_boot_setup_add(char *name, struct ifmap *map) for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { memset(s[i].name, 0, sizeof(s[i].name)); - strcpy(s[i].name, name); + strlcpy(s[i].name, name, IFNAMSIZ); memcpy(&s[i].map, map, sizeof(s[i].map)); break; } @@ -478,7 +480,7 @@ int netdev_boot_setup_check(struct net_device *dev) for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && - !strncmp(dev->name, s[i].name, strlen(s[i].name))) { + !strcmp(dev->name, s[i].name)) { dev->irq = s[i].map.irq; dev->base_addr = s[i].map.base_addr; dev->mem_start = s[i].map.mem_start; @@ -960,6 +962,12 @@ void netdev_state_change(struct net_device *dev) } } +void netdev_bonding_change(struct net_device *dev) +{ + call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev); +} +EXPORT_SYMBOL(netdev_bonding_change); + /** * dev_load - load a network module * @net: the applicable net namespace @@ -1116,6 +1124,29 @@ int dev_close(struct net_device *dev) } +/** + * dev_disable_lro - disable Large Receive Offload on a device + * @dev: device + * + * Disable Large Receive Offload (LRO) on a net device. Must be + * called under RTNL. This is needed if received packets may be + * forwarded to another interface. + */ +void dev_disable_lro(struct net_device *dev) +{ + if (dev->ethtool_ops && dev->ethtool_ops->get_flags && + dev->ethtool_ops->set_flags) { + u32 flags = dev->ethtool_ops->get_flags(dev); + if (flags & ETH_FLAG_LRO) { + flags &= ~ETH_FLAG_LRO; + dev->ethtool_ops->set_flags(dev, flags); + } + } + WARN_ON(dev->features & NETIF_F_LRO); +} +EXPORT_SYMBOL(dev_disable_lro); + + static int dev_boot_phase = 1; /* @@ -1289,16 +1320,18 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) } -void __netif_schedule(struct net_device *dev) +void __netif_schedule(struct netdev_queue *txq) { + struct net_device *dev = txq->dev; + if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) { - unsigned long flags; struct softnet_data *sd; + unsigned long flags; local_irq_save(flags); sd = &__get_cpu_var(softnet_data); - dev->next_sched = sd->output_queue; - sd->output_queue = dev; + txq->next_sched = sd->output_queue; + sd->output_queue = txq; raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); } @@ -1362,6 +1395,29 @@ void netif_device_attach(struct net_device *dev) } EXPORT_SYMBOL(netif_device_attach); +static bool can_checksum_protocol(unsigned long features, __be16 protocol) +{ + return ((features & NETIF_F_GEN_CSUM) || + ((features & NETIF_F_IP_CSUM) && + protocol == htons(ETH_P_IP)) || + ((features & NETIF_F_IPV6_CSUM) && + protocol == htons(ETH_P_IPV6))); +} + +static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) +{ + if (can_checksum_protocol(dev->features, skb->protocol)) + return true; + + if (skb->protocol == htons(ETH_P_8021Q)) { + struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; + if (can_checksum_protocol(dev->features & dev->vlan_features, + veh->h_vlan_encapsulated_proto)) + return true; + } + + return false; +} /* * Invalidate hardware checksum when packet is to be mangled, and @@ -1613,6 +1669,7 @@ out_kfree_skb: int dev_queue_xmit(struct sk_buff *skb) { struct net_device *dev = skb->dev; + struct netdev_queue *txq; struct Qdisc *q; int rc = -ENOMEM; @@ -1640,25 +1697,20 @@ int dev_queue_xmit(struct sk_buff *skb) if (skb->ip_summed == CHECKSUM_PARTIAL) { skb_set_transport_header(skb, skb->csum_start - skb_headroom(skb)); - - if (!(dev->features & NETIF_F_GEN_CSUM) && - !((dev->features & NETIF_F_IP_CSUM) && - skb->protocol == htons(ETH_P_IP)) && - !((dev->features & NETIF_F_IPV6_CSUM) && - skb->protocol == htons(ETH_P_IPV6))) - if (skb_checksum_help(skb)) - goto out_kfree_skb; + if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb)) + goto out_kfree_skb; } gso: - spin_lock_prefetch(&dev->queue_lock); + txq = &dev->tx_queue; + spin_lock_prefetch(&txq->lock); /* Disable soft irqs for various locks below. Also * stops preemption for RCU. */ rcu_read_lock_bh(); - /* Updates of qdisc are serialized by queue_lock. + /* Updates of qdisc are serialized by queue->lock. * The struct Qdisc which is pointed to by qdisc is now a * rcu structure - it may be accessed without acquiring * a lock (but the structure may be stale.) The freeing of the @@ -1666,29 +1718,29 @@ gso: * more references to it. * * If the qdisc has an enqueue function, we still need to - * hold the queue_lock before calling it, since queue_lock + * hold the queue->lock before calling it, since queue->lock * also serializes access to the device queue. */ - q = rcu_dereference(dev->qdisc); + q = rcu_dereference(txq->qdisc); #ifdef CONFIG_NET_CLS_ACT skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); #endif if (q->enqueue) { /* Grab device queue */ - spin_lock(&dev->queue_lock); - q = dev->qdisc; + spin_lock(&txq->lock); + q = txq->qdisc; if (q->enqueue) { /* reset queue_mapping to zero */ skb_set_queue_mapping(skb, 0); rc = q->enqueue(skb, q); - qdisc_run(dev); - spin_unlock(&dev->queue_lock); + qdisc_run(txq); + spin_unlock(&txq->lock); rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; goto out; } - spin_unlock(&dev->queue_lock); + spin_unlock(&txq->lock); } /* The device has no queue. Common case for software devices: @@ -1706,19 +1758,19 @@ gso: if (dev->flags & IFF_UP) { int cpu = smp_processor_id(); /* ok because BHs are off */ - if (dev->xmit_lock_owner != cpu) { + if (txq->xmit_lock_owner != cpu) { - HARD_TX_LOCK(dev, cpu); + HARD_TX_LOCK(dev, txq, cpu); if (!netif_queue_stopped(dev) && !netif_subqueue_stopped(dev, skb)) { rc = 0; if (!dev_hard_start_xmit(skb, dev)) { - HARD_TX_UNLOCK(dev); + HARD_TX_UNLOCK(dev, txq); goto out; } } - HARD_TX_UNLOCK(dev); + HARD_TX_UNLOCK(dev, txq); if (net_ratelimit()) printk(KERN_CRIT "Virtual device %s asks to " "queue packet!\n", dev->name); @@ -1862,7 +1914,7 @@ static void net_tx_action(struct softirq_action *h) } if (sd->output_queue) { - struct net_device *head; + struct netdev_queue *head; local_irq_disable(); head = sd->output_queue; @@ -1870,17 +1922,18 @@ static void net_tx_action(struct softirq_action *h) local_irq_enable(); while (head) { - struct net_device *dev = head; + struct netdev_queue *txq = head; + struct net_device *dev = txq->dev; head = head->next_sched; smp_mb__before_clear_bit(); clear_bit(__LINK_STATE_SCHED, &dev->state); - if (spin_trylock(&dev->queue_lock)) { - qdisc_run(dev); - spin_unlock(&dev->queue_lock); + if (spin_trylock(&txq->lock)) { + qdisc_run(txq); + spin_unlock(&txq->lock); } else { - netif_schedule(dev); + netif_schedule_queue(txq); } } } @@ -1961,10 +2014,11 @@ static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, */ static int ing_filter(struct sk_buff *skb) { - struct Qdisc *q; struct net_device *dev = skb->dev; - int result = TC_ACT_OK; u32 ttl = G_TC_RTTL(skb->tc_verd); + struct netdev_queue *rxq; + int result = TC_ACT_OK; + struct Qdisc *q; if (MAX_RED_LOOP < ttl++) { printk(KERN_WARNING @@ -1976,10 +2030,12 @@ static int ing_filter(struct sk_buff *skb) skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); - spin_lock(&dev->ingress_lock); - if ((q = dev->qdisc_ingress) != NULL) + rxq = &dev->rx_queue; + + spin_lock(&rxq->lock); + if ((q = rxq->qdisc) != NULL) result = q->enqueue(skb, q); - spin_unlock(&dev->ingress_lock); + spin_unlock(&rxq->lock); return result; } @@ -1988,7 +2044,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, struct net_device *orig_dev) { - if (!skb->dev->qdisc_ingress) + if (!skb->dev->rx_queue.qdisc) goto out; if (*pt_prev) { @@ -2012,6 +2068,33 @@ out: } #endif +/* + * netif_nit_deliver - deliver received packets to network taps + * @skb: buffer + * + * This function is used to deliver incoming packets to network + * taps. It should be used when the normal netif_receive_skb path + * is bypassed, for example because of VLAN acceleration. + */ +void netif_nit_deliver(struct sk_buff *skb) +{ + struct packet_type *ptype; + + if (list_empty(&ptype_all)) + return; + + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + skb->mac_len = skb->network_header - skb->mac_header; + + rcu_read_lock(); + list_for_each_entry_rcu(ptype, &ptype_all, list) { + if (!ptype->dev || ptype->dev == skb->dev) + deliver_skb(skb, ptype, skb->dev); + } + rcu_read_unlock(); +} + /** * netif_receive_skb - process receive buffer from network * @skb: buffer to process @@ -2059,6 +2142,10 @@ int netif_receive_skb(struct sk_buff *skb) rcu_read_lock(); + /* Don't receive packets in an exiting network namespace */ + if (!net_alive(dev_net(skb->dev))) + goto out; + #ifdef CONFIG_NET_CLS_ACT if (skb->tc_verd & TC_NCLS) { skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); @@ -2747,16 +2834,29 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) return 0; } -static void __dev_set_promiscuity(struct net_device *dev, int inc) +static int __dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; ASSERT_RTNL(); - if ((dev->promiscuity += inc) == 0) - dev->flags &= ~IFF_PROMISC; - else - dev->flags |= IFF_PROMISC; + dev->flags |= IFF_PROMISC; + dev->promiscuity += inc; + if (dev->promiscuity == 0) { + /* + * Avoid overflow. + * If inc causes overflow, untouch promisc and return error. + */ + if (inc < 0) + dev->flags &= ~IFF_PROMISC; + else { + dev->promiscuity -= inc; + printk(KERN_WARNING "%s: promiscuity touches roof, " + "set promiscuity failed, promiscuity feature " + "of device might be broken.\n", dev->name); + return -EOVERFLOW; + } + } if (dev->flags != old_flags) { printk(KERN_INFO "device %s %s promiscuous mode\n", dev->name, (dev->flags & IFF_PROMISC) ? "entered" : @@ -2774,6 +2874,7 @@ static void __dev_set_promiscuity(struct net_device *dev, int inc) if (dev->change_rx_flags) dev->change_rx_flags(dev, IFF_PROMISC); } + return 0; } /** @@ -2785,14 +2886,19 @@ static void __dev_set_promiscuity(struct net_device *dev, int inc) * remains above zero the interface remains promiscuous. Once it hits zero * the device reverts back to normal filtering operation. A negative inc * value is used to drop promiscuity on the device. + * Return 0 if successful or a negative errno code on error. */ -void dev_set_promiscuity(struct net_device *dev, int inc) +int dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; + int err; - __dev_set_promiscuity(dev, inc); + err = __dev_set_promiscuity(dev, inc); + if (err < 0) + return err; if (dev->flags != old_flags) dev_set_rx_mode(dev); + return err; } /** @@ -2805,22 +2911,38 @@ void dev_set_promiscuity(struct net_device *dev, int inc) * to all interfaces. Once it hits zero the device reverts back to normal * filtering operation. A negative @inc value is used to drop the counter * when releasing a resource needing all multicasts. + * Return 0 if successful or a negative errno code on error. */ -void dev_set_allmulti(struct net_device *dev, int inc) +int dev_set_allmulti(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; ASSERT_RTNL(); dev->flags |= IFF_ALLMULTI; - if ((dev->allmulti += inc) == 0) - dev->flags &= ~IFF_ALLMULTI; + dev->allmulti += inc; + if (dev->allmulti == 0) { + /* + * Avoid overflow. + * If inc causes overflow, untouch allmulti and return error. + */ + if (inc < 0) + dev->flags &= ~IFF_ALLMULTI; + else { + dev->allmulti -= inc; + printk(KERN_WARNING "%s: allmulti touches roof, " + "set allmulti failed, allmulti feature of " + "device might be broken.\n", dev->name); + return -EOVERFLOW; + } + } if (dev->flags ^ old_flags) { if (dev->change_rx_flags) dev->change_rx_flags(dev, IFF_ALLMULTI); dev_set_rx_mode(dev); } + return 0; } /* @@ -2860,7 +2982,9 @@ void __dev_set_rx_mode(struct net_device *dev) void dev_set_rx_mode(struct net_device *dev) { netif_tx_lock_bh(dev); + netif_addr_lock(dev); __dev_set_rx_mode(dev); + netif_addr_unlock(dev); netif_tx_unlock_bh(dev); } @@ -2940,9 +3064,11 @@ int dev_unicast_delete(struct net_device *dev, void *addr, int alen) ASSERT_RTNL(); netif_tx_lock_bh(dev); + netif_addr_lock(dev); err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0); if (!err) __dev_set_rx_mode(dev); + netif_addr_unlock(dev); netif_tx_unlock_bh(dev); return err; } @@ -2951,7 +3077,7 @@ EXPORT_SYMBOL(dev_unicast_delete); /** * dev_unicast_add - add a secondary unicast address * @dev: device - * @addr: address to delete + * @addr: address to add * @alen: length of @addr * * Add a secondary unicast address to the device or increase @@ -2966,9 +3092,11 @@ int dev_unicast_add(struct net_device *dev, void *addr, int alen) ASSERT_RTNL(); netif_tx_lock_bh(dev); + netif_addr_lock(dev); err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0); if (!err) __dev_set_rx_mode(dev); + netif_addr_unlock(dev); netif_tx_unlock_bh(dev); return err; } @@ -3037,10 +3165,12 @@ int dev_unicast_sync(struct net_device *to, struct net_device *from) int err = 0; netif_tx_lock_bh(to); + netif_addr_lock(to); err = __dev_addr_sync(&to->uc_list, &to->uc_count, &from->uc_list, &from->uc_count); if (!err) __dev_set_rx_mode(to); + netif_addr_unlock(to); netif_tx_unlock_bh(to); return err; } @@ -3058,13 +3188,17 @@ EXPORT_SYMBOL(dev_unicast_sync); void dev_unicast_unsync(struct net_device *to, struct net_device *from) { netif_tx_lock_bh(from); + netif_addr_lock(from); netif_tx_lock_bh(to); + netif_addr_lock(to); __dev_addr_unsync(&to->uc_list, &to->uc_count, &from->uc_list, &from->uc_count); __dev_set_rx_mode(to); + netif_addr_unlock(to); netif_tx_unlock_bh(to); + netif_addr_unlock(from); netif_tx_unlock_bh(from); } EXPORT_SYMBOL(dev_unicast_unsync); @@ -3086,6 +3220,7 @@ static void __dev_addr_discard(struct dev_addr_list **list) static void dev_addr_discard(struct net_device *dev) { netif_tx_lock_bh(dev); + netif_addr_lock(dev); __dev_addr_discard(&dev->uc_list); dev->uc_count = 0; @@ -3093,6 +3228,7 @@ static void dev_addr_discard(struct net_device *dev) __dev_addr_discard(&dev->mc_list); dev->mc_count = 0; + netif_addr_unlock(dev); netif_tx_unlock_bh(dev); } @@ -3666,6 +3802,20 @@ static void rollback_registered(struct net_device *dev) dev_put(dev); } +static void __netdev_init_queue_locks_one(struct netdev_queue *dev_queue, + struct net_device *dev) +{ + spin_lock_init(&dev_queue->_xmit_lock); + netdev_set_lockdep_class(&dev_queue->_xmit_lock, dev->type); + dev_queue->xmit_lock_owner = -1; +} + +static void netdev_init_queue_locks(struct net_device *dev) +{ + __netdev_init_queue_locks_one(&dev->tx_queue, dev); + __netdev_init_queue_locks_one(&dev->rx_queue, dev); +} + /** * register_netdevice - register a network device * @dev: device to register @@ -3700,11 +3850,8 @@ int register_netdevice(struct net_device *dev) BUG_ON(!dev_net(dev)); net = dev_net(dev); - spin_lock_init(&dev->queue_lock); - spin_lock_init(&dev->_xmit_lock); - netdev_set_lockdep_class(&dev->_xmit_lock, dev->type); - dev->xmit_lock_owner = -1; - spin_lock_init(&dev->ingress_lock); + spin_lock_init(&dev->addr_list_lock); + netdev_init_queue_locks(dev); dev->iflink = -1; @@ -3985,6 +4132,19 @@ static struct net_device_stats *internal_stats(struct net_device *dev) return &dev->stats; } +static void netdev_init_one_queue(struct net_device *dev, + struct netdev_queue *queue) +{ + spin_lock_init(&queue->lock); + queue->dev = dev; +} + +static void netdev_init_queues(struct net_device *dev) +{ + netdev_init_one_queue(dev, &dev->rx_queue); + netdev_init_one_queue(dev, &dev->tx_queue); +} + /** * alloc_netdev_mq - allocate network device * @sizeof_priv: size of private data to allocate space for @@ -4037,6 +4197,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, dev->egress_subqueue_count = queue_count; dev->gso_max_size = GSO_MAX_SIZE; + netdev_init_queues(dev); + dev->get_stats = internal_stats; netpoll_netdev_init(dev); setup(dev); @@ -4238,7 +4400,7 @@ static int dev_cpu_callback(struct notifier_block *nfb, void *ocpu) { struct sk_buff **list_skb; - struct net_device **list_net; + struct netdev_queue **list_net; struct sk_buff *skb; unsigned int cpu, oldcpu = (unsigned long)ocpu; struct softnet_data *sd, *oldsd;