err.no Git - linux-2.6/blob - net/core/dev.c

   1 /*
   2  *      NET3    Protocol independent device support routines.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  *      Derived from the non IP parts of dev.c 1.0.19
  10  *              Authors:        Ross Biro
  11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *
  14  *      Additional Authors:
  15  *              Florian la Roche <rzsfl@rz.uni-sb.de>
  16  *              Alan Cox <gw4pts@gw4pts.ampr.org>
  17  *              David Hinds <dahinds@users.sourceforge.net>
  18  *              Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  19  *              Adam Sulmicki <adam@cfar.umd.edu>
  20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
  21  *
  22  *      Changes:
  23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
  24  *                                      to 2 if register_netdev gets called
  25  *                                      before net_dev_init & also removed a
  26  *                                      few lines of code in the process.
  27  *              Alan Cox        :       device private ioctl copies fields back.
  28  *              Alan Cox        :       Transmit queue code does relevant
  29  *                                      stunts to keep the queue safe.
  30  *              Alan Cox        :       Fixed double lock.
  31  *              Alan Cox        :       Fixed promisc NULL pointer trap
  32  *              ????????        :       Support the full private ioctl range
  33  *              Alan Cox        :       Moved ioctl permission check into
  34  *                                      drivers
  35  *              Tim Kordas      :       SIOCADDMULTI/SIOCDELMULTI
  36  *              Alan Cox        :       100 backlog just doesn't cut it when
  37  *                                      you start doing multicast video 8)
  38  *              Alan Cox        :       Rewrote net_bh and list manager.
  39  *              Alan Cox        :       Fix ETH_P_ALL echoback lengths.
  40  *              Alan Cox        :       Took out transmit every packet pass
  41  *                                      Saved a few bytes in the ioctl handler
  42  *              Alan Cox        :       Network driver sets packet type before
  43  *                                      calling netif_rx. Saves a function
  44  *                                      call a packet.
  45  *              Alan Cox        :       Hashed net_bh()
  46  *              Richard Kooijman:       Timestamp fixes.
  47  *              Alan Cox        :       Wrong field in SIOCGIFDSTADDR
  48  *              Alan Cox        :       Device lock protection.
  49  *              Alan Cox        :       Fixed nasty side effect of device close
  50  *                                      changes.
  51  *              Rudi Cilibrasi  :       Pass the right thing to
  52  *                                      set_mac_address()
  53  *              Dave Miller     :       32bit quantity for the device lock to
  54  *                                      make it work out on a Sparc.
  55  *              Bjorn Ekwall    :       Added KERNELD hack.
  56  *              Alan Cox        :       Cleaned up the backlog initialise.
  57  *              Craig Metz      :       SIOCGIFCONF fix if space for under
  58  *                                      1 device.
  59  *          Thomas Bogendoerfer :       Return ENODEV for dev_open, if there
  60  *                                      is no device open function.
  61  *              Andi Kleen      :       Fix error reporting for SIOCGIFCONF
  62  *          Michael Chastain    :       Fix signed/unsigned for SIOCGIFCONF
  63  *              Cyrus Durgin    :       Cleaned for KMOD
  64  *              Adam Sulmicki   :       Bug Fix : Network Device Unload
  65  *                                      A network device unload needs to purge
  66  *                                      the backlog queue.
  67  *      Paul Rusty Russell      :       SIOCSIFNAME
  68  *              Pekka Riikonen  :       Netdev boot-time settings code
  69  *              Andrew Morton   :       Make unregister_netdevice wait
  70  *                                      indefinitely on dev->refcnt
  71  *              J Hadi Salim    :       - Backlog queue sampling
  72  *                                      - netif_rx() feedback
  73  */
  74
  75 #include <asm/uaccess.h>
  76 #include <asm/system.h>
  77 #include <linux/bitops.h>
  78 #include <linux/capability.h>
  79 #include <linux/cpu.h>
  80 #include <linux/types.h>
  81 #include <linux/kernel.h>
  82 #include <linux/sched.h>
  83 #include <linux/mutex.h>
  84 #include <linux/string.h>
  85 #include <linux/mm.h>
  86 #include <linux/socket.h>
  87 #include <linux/sockios.h>
  88 #include <linux/errno.h>
  89 #include <linux/interrupt.h>
  90 #include <linux/if_ether.h>
  91 #include <linux/netdevice.h>
  92 #include <linux/etherdevice.h>
  93 #include <linux/notifier.h>
  94 #include <linux/skbuff.h>
  95 #include <net/net_namespace.h>
  96 #include <net/sock.h>
  97 #include <linux/rtnetlink.h>
  98 #include <linux/proc_fs.h>
  99 #include <linux/seq_file.h>
 100 #include <linux/stat.h>
 101 #include <linux/if_bridge.h>
 102 #include <linux/if_macvlan.h>
 103 #include <net/dst.h>
 104 #include <net/pkt_sched.h>
 105 #include <net/checksum.h>
 106 #include <linux/highmem.h>
 107 #include <linux/init.h>
 108 #include <linux/kmod.h>
 109 #include <linux/module.h>
 110 #include <linux/kallsyms.h>
 111 #include <linux/netpoll.h>
 112 #include <linux/rcupdate.h>
 113 #include <linux/delay.h>
 114 #include <net/wext.h>
 115 #include <net/iw_handler.h>
 116 #include <asm/current.h>
 117 #include <linux/audit.h>
 118 #include <linux/dmaengine.h>
 119 #include <linux/err.h>
 120 #include <linux/ctype.h>
 121 #include <linux/if_arp.h>
 122
 123 /*
 124  *      The list of packet types we will receive (as opposed to discard)
 125  *      and the routines to invoke.
 126  *
 127  *      Why 16. Because with 16 the only overlap we get on a hash of the
 128  *      low nibble of the protocol value is RARP/SNAP/X.25.
 129  *
 130  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
 131  *             sure which should go first, but I bet it won't make much
 132  *             difference if we are running VLANs.  The good news is that
 133  *             this protocol won't be in the list unless compiled in, so
 134  *             the average user (w/out VLANs) will not be adversely affected.
 135  *             --BLG
 136  *
 137  *              0800    IP
 138  *              8100    802.1Q VLAN
 139  *              0001    802.3
 140  *              0002    AX.25
 141  *              0004    802.2
 142  *              8035    RARP
 143  *              0005    SNAP
 144  *              0805    X.25
 145  *              0806    ARP
 146  *              8137    IPX
 147  *              0009    Localtalk
 148  *              86DD    IPv6
 149  */
 150
 151 static DEFINE_SPINLOCK(ptype_lock);
 152 static struct list_head ptype_base[16] __read_mostly;   /* 16 way hashed list */
 153 static struct list_head ptype_all __read_mostly;        /* Taps */
 154
 155 #ifdef CONFIG_NET_DMA
 156 struct net_dma {
 157         struct dma_client client;
 158         spinlock_t lock;
 159         cpumask_t channel_mask;
 160         struct dma_chan *channels[NR_CPUS];
 161 };
 162
 163 static enum dma_state_client
 164 netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
 165         enum dma_state state);
 166
 167 static struct net_dma net_dma = {
 168         .client = {
 169                 .event_callback = netdev_dma_event,
 170         },
 171 };
 172 #endif
 173
 174 /*
 175  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
 176  * semaphore.
 177  *
 178  * Pure readers hold dev_base_lock for reading.
 179  *
 180  * Writers must hold the rtnl semaphore while they loop through the
 181  * dev_base_head list, and hold dev_base_lock for writing when they do the
 182  * actual updates.  This allows pure readers to access the list even
 183  * while a writer is preparing to update it.
 184  *
 185  * To put it another way, dev_base_lock is held for writing only to
 186  * protect against pure readers; the rtnl semaphore provides the
 187  * protection against other writers.
 188  *
 189  * See, for example usages, register_netdevice() and
 190  * unregister_netdevice(), which must be called with the rtnl
 191  * semaphore held.
 192  */
 193 DEFINE_RWLOCK(dev_base_lock);
 194
 195 EXPORT_SYMBOL(dev_base_lock);
 196
 197 #define NETDEV_HASHBITS 8
 198 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
 199
 200 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
 201 {
 202         unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
 203         return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
 204 }
 205
 206 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
 207 {
 208         return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
 209 }
 210
 211 /* Device list insertion */
 212 static int list_netdevice(struct net_device *dev)
 213 {
 214         struct net *net = dev->nd_net;
 215
 216         ASSERT_RTNL();
 217
 218         write_lock_bh(&dev_base_lock);
 219         list_add_tail(&dev->dev_list, &net->dev_base_head);
 220         hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
 221         hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
 222         write_unlock_bh(&dev_base_lock);
 223         return 0;
 224 }
 225
 226 /* Device list removal */
 227 static void unlist_netdevice(struct net_device *dev)
 228 {
 229         ASSERT_RTNL();
 230
 231         /* Unlink dev from the device chain */
 232         write_lock_bh(&dev_base_lock);
 233         list_del(&dev->dev_list);
 234         hlist_del(&dev->name_hlist);
 235         hlist_del(&dev->index_hlist);
 236         write_unlock_bh(&dev_base_lock);
 237 }
 238
 239 /*
 240  *      Our notifier list
 241  */
 242
 243 static RAW_NOTIFIER_HEAD(netdev_chain);
 244
 245 /*
 246  *      Device drivers call our routines to queue packets here. We empty the
 247  *      queue in the local softnet handler.
 248  */
 249
 250 DEFINE_PER_CPU(struct softnet_data, softnet_data);
 251
 252 extern int netdev_kobject_init(void);
 253 extern int netdev_register_kobject(struct net_device *);
 254 extern void netdev_unregister_kobject(struct net_device *);
 255
 256 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 257 /*
 258  * register_netdevice() inits dev->_xmit_lock and sets lockdep class
 259  * according to dev->type
 260  */
 261 static const unsigned short netdev_lock_type[] =
 262         {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
 263          ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
 264          ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
 265          ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
 266          ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
 267          ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
 268          ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
 269          ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
 270          ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
 271          ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
 272          ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
 273          ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
 274          ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
 275          ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
 276          ARPHRD_NONE};
 277
 278 static const char *netdev_lock_name[] =
 279         {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
 280          "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
 281          "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
 282          "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
 283          "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
 284          "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
 285          "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
 286          "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
 287          "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
 288          "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
 289          "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
 290          "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
 291          "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
 292          "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
 293          "_xmit_NONE"};
 294
 295 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
 296
 297 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
 298 {
 299         int i;
 300
 301         for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
 302                 if (netdev_lock_type[i] == dev_type)
 303                         return i;
 304         /* the last key is used by default */
 305         return ARRAY_SIZE(netdev_lock_type) - 1;
 306 }
 307
 308 static inline void netdev_set_lockdep_class(spinlock_t *lock,
 309                                             unsigned short dev_type)
 310 {
 311         int i;
 312
 313         i = netdev_lock_pos(dev_type);
 314         lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
 315                                    netdev_lock_name[i]);
 316 }
 317 #else
 318 static inline void netdev_set_lockdep_class(spinlock_t *lock,
 319                                             unsigned short dev_type)
 320 {
 321 }
 322 #endif
 323
 324 /*******************************************************************************
 325
 326                 Protocol management and registration routines
 327
 328 *******************************************************************************/
 329
 330 /*
 331  *      Add a protocol ID to the list. Now that the input handler is
 332  *      smarter we can dispense with all the messy stuff that used to be
 333  *      here.
 334  *
 335  *      BEWARE!!! Protocol handlers, mangling input packets,
 336  *      MUST BE last in hash buckets and checking protocol handlers
 337  *      MUST start from promiscuous ptype_all chain in net_bh.
 338  *      It is true now, do not change it.
 339  *      Explanation follows: if protocol handler, mangling packet, will
 340  *      be the first on list, it is not able to sense, that packet
 341  *      is cloned and should be copied-on-write, so that it will
 342  *      change it and subsequent readers will get broken packet.
 343  *                                                      --ANK (980803)
 344  */
 345
 346 /**
 347  *      dev_add_pack - add packet handler
 348  *      @pt: packet type declaration
 349  *
 350  *      Add a protocol handler to the networking stack. The passed &packet_type
 351  *      is linked into kernel lists and may not be freed until it has been
 352  *      removed from the kernel lists.
 353  *
 354  *      This call does not sleep therefore it can not
 355  *      guarantee all CPU's that are in middle of receiving packets
 356  *      will see the new packet type (until the next received packet).
 357  */
 358
 359 void dev_add_pack(struct packet_type *pt)
 360 {
 361         int hash;
 362
 363         spin_lock_bh(&ptype_lock);
 364         if (pt->type == htons(ETH_P_ALL))
 365                 list_add_rcu(&pt->list, &ptype_all);
 366         else {
 367                 hash = ntohs(pt->type) & 15;
 368                 list_add_rcu(&pt->list, &ptype_base[hash]);
 369         }
 370         spin_unlock_bh(&ptype_lock);
 371 }
 372
 373 /**
 374  *      __dev_remove_pack        - remove packet handler
 375  *      @pt: packet type declaration
 376  *
 377  *      Remove a protocol handler that was previously added to the kernel
 378  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 379  *      from the kernel lists and can be freed or reused once this function
 380  *      returns.
 381  *
 382  *      The packet type might still be in use by receivers
 383  *      and must not be freed until after all the CPU's have gone
 384  *      through a quiescent state.
 385  */
 386 void __dev_remove_pack(struct packet_type *pt)
 387 {
 388         struct list_head *head;
 389         struct packet_type *pt1;
 390
 391         spin_lock_bh(&ptype_lock);
 392
 393         if (pt->type == htons(ETH_P_ALL))
 394                 head = &ptype_all;
 395         else
 396                 head = &ptype_base[ntohs(pt->type) & 15];
 397
 398         list_for_each_entry(pt1, head, list) {
 399                 if (pt == pt1) {
 400                         list_del_rcu(&pt->list);
 401                         goto out;
 402                 }
 403         }
 404
 405         printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
 406 out:
 407         spin_unlock_bh(&ptype_lock);
 408 }
 409 /**
 410  *      dev_remove_pack  - remove packet handler
 411  *      @pt: packet type declaration
 412  *
 413  *      Remove a protocol handler that was previously added to the kernel
 414  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 415  *      from the kernel lists and can be freed or reused once this function
 416  *      returns.
 417  *
 418  *      This call sleeps to guarantee that no CPU is looking at the packet
 419  *      type after return.
 420  */
 421 void dev_remove_pack(struct packet_type *pt)
 422 {
 423         __dev_remove_pack(pt);
 424
 425         synchronize_net();
 426 }
 427
 428 /******************************************************************************
 429
 430                       Device Boot-time Settings Routines
 431
 432 *******************************************************************************/
 433
 434 /* Boot time configuration table */
 435 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
 436
 437 /**
 438  *      netdev_boot_setup_add   - add new setup entry
 439  *      @name: name of the device
 440  *      @map: configured settings for the device
 441  *
 442  *      Adds new setup entry to the dev_boot_setup list.  The function
 443  *      returns 0 on error and 1 on success.  This is a generic routine to
 444  *      all netdevices.
 445  */
 446 static int netdev_boot_setup_add(char *name, struct ifmap *map)
 447 {
 448         struct netdev_boot_setup *s;
 449         int i;
 450
 451         s = dev_boot_setup;
 452         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 453                 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
 454                         memset(s[i].name, 0, sizeof(s[i].name));
 455                         strcpy(s[i].name, name);
 456                         memcpy(&s[i].map, map, sizeof(s[i].map));
 457                         break;
 458                 }
 459         }
 460
 461         return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
 462 }
 463
 464 /**
 465  *      netdev_boot_setup_check - check boot time settings
 466  *      @dev: the netdevice
 467  *
 468  *      Check boot time settings for the device.
 469  *      The found settings are set for the device to be used
 470  *      later in the device probing.
 471  *      Returns 0 if no settings found, 1 if they are.
 472  */
 473 int netdev_boot_setup_check(struct net_device *dev)
 474 {
 475         struct netdev_boot_setup *s = dev_boot_setup;
 476         int i;
 477
 478         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 479                 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
 480                     !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
 481                         dev->irq        = s[i].map.irq;
 482                         dev->base_addr  = s[i].map.base_addr;
 483                         dev->mem_start  = s[i].map.mem_start;
 484                         dev->mem_end    = s[i].map.mem_end;
 485                         return 1;
 486                 }
 487         }
 488         return 0;
 489 }
 490
 491
 492 /**
 493  *      netdev_boot_base        - get address from boot time settings
 494  *      @prefix: prefix for network device
 495  *      @unit: id for network device
 496  *
 497  *      Check boot time settings for the base address of device.
 498  *      The found settings are set for the device to be used
 499  *      later in the device probing.
 500  *      Returns 0 if no settings found.
 501  */
 502 unsigned long netdev_boot_base(const char *prefix, int unit)
 503 {
 504         const struct netdev_boot_setup *s = dev_boot_setup;
 505         char name[IFNAMSIZ];
 506         int i;
 507
 508         sprintf(name, "%s%d", prefix, unit);
 509
 510         /*
 511          * If device already registered then return base of 1
 512          * to indicate not to probe for this interface
 513          */
 514         if (__dev_get_by_name(&init_net, name))
 515                 return 1;
 516
 517         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
 518                 if (!strcmp(name, s[i].name))
 519                         return s[i].map.base_addr;
 520         return 0;
 521 }
 522
 523 /*
 524  * Saves at boot time configured settings for any netdevice.
 525  */
 526 int __init netdev_boot_setup(char *str)
 527 {
 528         int ints[5];
 529         struct ifmap map;
 530
 531         str = get_options(str, ARRAY_SIZE(ints), ints);
 532         if (!str || !*str)
 533                 return 0;
 534
 535         /* Save settings */
 536         memset(&map, 0, sizeof(map));
 537         if (ints[0] > 0)
 538                 map.irq = ints[1];
 539         if (ints[0] > 1)
 540                 map.base_addr = ints[2];
 541         if (ints[0] > 2)
 542                 map.mem_start = ints[3];
 543         if (ints[0] > 3)
 544                 map.mem_end = ints[4];
 545
 546         /* Add new entry to the list */
 547         return netdev_boot_setup_add(str, &map);
 548 }
 549
 550 __setup("netdev=", netdev_boot_setup);
 551
 552 /*******************************************************************************
 553
 554                             Device Interface Subroutines
 555
 556 *******************************************************************************/
 557
 558 /**
 559  *      __dev_get_by_name       - find a device by its name
 560  *      @name: name to find
 561  *
 562  *      Find an interface by name. Must be called under RTNL semaphore
 563  *      or @dev_base_lock. If the name is found a pointer to the device
 564  *      is returned. If the name is not found then %NULL is returned. The
 565  *      reference counters are not incremented so the caller must be
 566  *      careful with locks.
 567  */
 568
 569 struct net_device *__dev_get_by_name(struct net *net, const char *name)
 570 {
 571         struct hlist_node *p;
 572
 573         hlist_for_each(p, dev_name_hash(net, name)) {
 574                 struct net_device *dev
 575                         = hlist_entry(p, struct net_device, name_hlist);
 576                 if (!strncmp(dev->name, name, IFNAMSIZ))
 577                         return dev;
 578         }
 579         return NULL;
 580 }
 581
 582 /**
 583  *      dev_get_by_name         - find a device by its name
 584  *      @name: name to find
 585  *
 586  *      Find an interface by name. This can be called from any
 587  *      context and does its own locking. The returned handle has
 588  *      the usage count incremented and the caller must use dev_put() to
 589  *      release it when it is no longer needed. %NULL is returned if no
 590  *      matching device is found.
 591  */
 592
 593 struct net_device *dev_get_by_name(struct net *net, const char *name)
 594 {
 595         struct net_device *dev;
 596
 597         read_lock(&dev_base_lock);
 598         dev = __dev_get_by_name(net, name);
 599         if (dev)
 600                 dev_hold(dev);
 601         read_unlock(&dev_base_lock);
 602         return dev;
 603 }
 604
 605 /**
 606  *      __dev_get_by_index - find a device by its ifindex
 607  *      @ifindex: index of device
 608  *
 609  *      Search for an interface by index. Returns %NULL if the device
 610  *      is not found or a pointer to the device. The device has not
 611  *      had its reference counter increased so the caller must be careful
 612  *      about locking. The caller must hold either the RTNL semaphore
 613  *      or @dev_base_lock.
 614  */
 615
 616 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
 617 {
 618         struct hlist_node *p;
 619
 620         hlist_for_each(p, dev_index_hash(net, ifindex)) {
 621                 struct net_device *dev
 622                         = hlist_entry(p, struct net_device, index_hlist);
 623                 if (dev->ifindex == ifindex)
 624                         return dev;
 625         }
 626         return NULL;
 627 }
 628
 629
 630 /**
 631  *      dev_get_by_index - find a device by its ifindex
 632  *      @ifindex: index of device
 633  *
 634  *      Search for an interface by index. Returns NULL if the device
 635  *      is not found or a pointer to the device. The device returned has
 636  *      had a reference added and the pointer is safe until the user calls
 637  *      dev_put to indicate they have finished with it.
 638  */
 639
 640 struct net_device *dev_get_by_index(struct net *net, int ifindex)
 641 {
 642         struct net_device *dev;
 643
 644         read_lock(&dev_base_lock);
 645         dev = __dev_get_by_index(net, ifindex);
 646         if (dev)
 647                 dev_hold(dev);
 648         read_unlock(&dev_base_lock);
 649         return dev;
 650 }
 651
 652 /**
 653  *      dev_getbyhwaddr - find a device by its hardware address
 654  *      @type: media type of device
 655  *      @ha: hardware address
 656  *
 657  *      Search for an interface by MAC address. Returns NULL if the device
 658  *      is not found or a pointer to the device. The caller must hold the
 659  *      rtnl semaphore. The returned device has not had its ref count increased
 660  *      and the caller must therefore be careful about locking
 661  *
 662  *      BUGS:
 663  *      If the API was consistent this would be __dev_get_by_hwaddr
 664  */
 665
 666 struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
 667 {
 668         struct net_device *dev;
 669
 670         ASSERT_RTNL();
 671
 672         for_each_netdev(&init_net, dev)
 673                 if (dev->type == type &&
 674                     !memcmp(dev->dev_addr, ha, dev->addr_len))
 675                         return dev;
 676
 677         return NULL;
 678 }
 679
 680 EXPORT_SYMBOL(dev_getbyhwaddr);
 681
 682 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
 683 {
 684         struct net_device *dev;
 685
 686         ASSERT_RTNL();
 687         for_each_netdev(net, dev)
 688                 if (dev->type == type)
 689                         return dev;
 690
 691         return NULL;
 692 }
 693
 694 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
 695
 696 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
 697 {
 698         struct net_device *dev;
 699
 700         rtnl_lock();
 701         dev = __dev_getfirstbyhwtype(net, type);
 702         if (dev)
 703                 dev_hold(dev);
 704         rtnl_unlock();
 705         return dev;
 706 }
 707
 708 EXPORT_SYMBOL(dev_getfirstbyhwtype);
 709
 710 /**
 711  *      dev_get_by_flags - find any device with given flags
 712  *      @if_flags: IFF_* values
 713  *      @mask: bitmask of bits in if_flags to check
 714  *
 715  *      Search for any interface with the given flags. Returns NULL if a device
 716  *      is not found or a pointer to the device. The device returned has
 717  *      had a reference added and the pointer is safe until the user calls
 718  *      dev_put to indicate they have finished with it.
 719  */
 720
 721 struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
 722 {
 723         struct net_device *dev, *ret;
 724
 725         ret = NULL;
 726         read_lock(&dev_base_lock);
 727         for_each_netdev(net, dev) {
 728                 if (((dev->flags ^ if_flags) & mask) == 0) {
 729                         dev_hold(dev);
 730                         ret = dev;
 731                         break;
 732                 }
 733         }
 734         read_unlock(&dev_base_lock);
 735         return ret;
 736 }
 737
 738 /**
 739  *      dev_valid_name - check if name is okay for network device
 740  *      @name: name string
 741  *
 742  *      Network device names need to be valid file names to
 743  *      to allow sysfs to work.  We also disallow any kind of
 744  *      whitespace.
 745  */
 746 int dev_valid_name(const char *name)
 747 {
 748         if (*name == '\0')
 749                 return 0;
 750         if (strlen(name) >= IFNAMSIZ)
 751                 return 0;
 752         if (!strcmp(name, ".") || !strcmp(name, ".."))
 753                 return 0;
 754
 755         while (*name) {
 756                 if (*name == '/' || isspace(*name))
 757                         return 0;
 758                 name++;
 759         }
 760         return 1;
 761 }
 762
 763 /**
 764  *      __dev_alloc_name - allocate a name for a device
 765  *      @net: network namespace to allocate the device name in
 766  *      @name: name format string
 767  *      @buf:  scratch buffer and result name string
 768  *
 769  *      Passed a format string - eg "lt%d" it will try and find a suitable
 770  *      id. It scans list of devices to build up a free map, then chooses
 771  *      the first empty slot. The caller must hold the dev_base or rtnl lock
 772  *      while allocating the name and adding the device in order to avoid
 773  *      duplicates.
 774  *      Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 775  *      Returns the number of the unit assigned or a negative errno code.
 776  */
 777
 778 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
 779 {
 780         int i = 0;
 781         const char *p;
 782         const int max_netdevices = 8*PAGE_SIZE;
 783         long *inuse;
 784         struct net_device *d;
 785
 786         p = strnchr(name, IFNAMSIZ-1, '%');
 787         if (p) {
 788                 /*
 789                  * Verify the string as this thing may have come from
 790                  * the user.  There must be either one "%d" and no other "%"
 791                  * characters.
 792                  */
 793                 if (p[1] != 'd' || strchr(p + 2, '%'))
 794                         return -EINVAL;
 795
 796                 /* Use one page as a bit array of possible slots */
 797                 inuse = (long *) get_zeroed_page(GFP_ATOMIC);
 798                 if (!inuse)
 799                         return -ENOMEM;
 800
 801                 for_each_netdev(net, d) {
 802                         if (!sscanf(d->name, name, &i))
 803                                 continue;
 804                         if (i < 0 || i >= max_netdevices)
 805                                 continue;
 806
 807                         /*  avoid cases where sscanf is not exact inverse of printf */
 808                         snprintf(buf, IFNAMSIZ, name, i);
 809                         if (!strncmp(buf, d->name, IFNAMSIZ))
 810                                 set_bit(i, inuse);
 811                 }
 812
 813                 i = find_first_zero_bit(inuse, max_netdevices);
 814                 free_page((unsigned long) inuse);
 815         }
 816
 817         snprintf(buf, IFNAMSIZ, name, i);
 818         if (!__dev_get_by_name(net, buf))
 819                 return i;
 820
 821         /* It is possible to run out of possible slots
 822          * when the name is long and there isn't enough space left
 823          * for the digits, or if all bits are used.
 824          */
 825         return -ENFILE;
 826 }
 827
 828 /**
 829  *      dev_alloc_name - allocate a name for a device
 830  *      @dev: device
 831  *      @name: name format string
 832  *
 833  *      Passed a format string - eg "lt%d" it will try and find a suitable
 834  *      id. It scans list of devices to build up a free map, then chooses
 835  *      the first empty slot. The caller must hold the dev_base or rtnl lock
 836  *      while allocating the name and adding the device in order to avoid
 837  *      duplicates.
 838  *      Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 839  *      Returns the number of the unit assigned or a negative errno code.
 840  */
 841
 842 int dev_alloc_name(struct net_device *dev, const char *name)
 843 {
 844         char buf[IFNAMSIZ];
 845         struct net *net;
 846         int ret;
 847
 848         BUG_ON(!dev->nd_net);
 849         net = dev->nd_net;
 850         ret = __dev_alloc_name(net, name, buf);
 851         if (ret >= 0)
 852                 strlcpy(dev->name, buf, IFNAMSIZ);
 853         return ret;
 854 }
 855
 856
 857 /**
 858  *      dev_change_name - change name of a device
 859  *      @dev: device
 860  *      @newname: name (or format string) must be at least IFNAMSIZ
 861  *
 862  *      Change name of a device, can pass format strings "eth%d".
 863  *      for wildcarding.
 864  */
 865 int dev_change_name(struct net_device *dev, char *newname)
 866 {
 867         char oldname[IFNAMSIZ];
 868         int err = 0;
 869         int ret;
 870         struct net *net;
 871
 872         ASSERT_RTNL();
 873         BUG_ON(!dev->nd_net);
 874
 875         net = dev->nd_net;
 876         if (dev->flags & IFF_UP)
 877                 return -EBUSY;
 878
 879         if (!dev_valid_name(newname))
 880                 return -EINVAL;
 881
 882         memcpy(oldname, dev->name, IFNAMSIZ);
 883
 884         if (strchr(newname, '%')) {
 885                 err = dev_alloc_name(dev, newname);
 886                 if (err < 0)
 887                         return err;
 888                 strcpy(newname, dev->name);
 889         }
 890         else if (__dev_get_by_name(net, newname))
 891                 return -EEXIST;
 892         else
 893                 strlcpy(dev->name, newname, IFNAMSIZ);
 894
 895 rollback:
 896         device_rename(&dev->dev, dev->name);
 897
 898         write_lock_bh(&dev_base_lock);
 899         hlist_del(&dev->name_hlist);
 900         hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
 901         write_unlock_bh(&dev_base_lock);
 902
 903         ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
 904         ret = notifier_to_errno(ret);
 905
 906         if (ret) {
 907                 if (err) {
 908                         printk(KERN_ERR
 909                                "%s: name change rollback failed: %d.\n",
 910                                dev->name, ret);
 911                 } else {
 912                         err = ret;
 913                         memcpy(dev->name, oldname, IFNAMSIZ);
 914                         goto rollback;
 915                 }
 916         }
 917
 918         return err;
 919 }
 920
 921 /**
 922  *      netdev_features_change - device changes features
 923  *      @dev: device to cause notification
 924  *
 925  *      Called to indicate a device has changed features.
 926  */
 927 void netdev_features_change(struct net_device *dev)
 928 {
 929         call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
 930 }
 931 EXPORT_SYMBOL(netdev_features_change);
 932
 933 /**
 934  *      netdev_state_change - device changes state
 935  *      @dev: device to cause notification
 936  *
 937  *      Called to indicate a device has changed state. This function calls
 938  *      the notifier chains for netdev_chain and sends a NEWLINK message
 939  *      to the routing socket.
 940  */
 941 void netdev_state_change(struct net_device *dev)
 942 {
 943         if (dev->flags & IFF_UP) {
 944                 call_netdevice_notifiers(NETDEV_CHANGE, dev);
 945                 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
 946         }
 947 }
 948
 949 /**
 950  *      dev_load        - load a network module
 951  *      @name: name of interface
 952  *
 953  *      If a network interface is not present and the process has suitable
 954  *      privileges this function loads the module. If module loading is not
 955  *      available in this kernel then it becomes a nop.
 956  */
 957
 958 void dev_load(struct net *net, const char *name)
 959 {
 960         struct net_device *dev;
 961
 962         read_lock(&dev_base_lock);
 963         dev = __dev_get_by_name(net, name);
 964         read_unlock(&dev_base_lock);
 965
 966         if (!dev && capable(CAP_SYS_MODULE))
 967                 request_module("%s", name);
 968 }
 969
 970 static int default_rebuild_header(struct sk_buff *skb)
 971 {
 972         printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
 973                skb->dev ? skb->dev->name : "NULL!!!");
 974         kfree_skb(skb);
 975         return 1;
 976 }
 977
 978 /**
 979  *      dev_open        - prepare an interface for use.
 980  *      @dev:   device to open
 981  *
 982  *      Takes a device from down to up state. The device's private open
 983  *      function is invoked and then the multicast lists are loaded. Finally
 984  *      the device is moved into the up state and a %NETDEV_UP message is
 985  *      sent to the netdev notifier chain.
 986  *
 987  *      Calling this function on an active interface is a nop. On a failure
 988  *      a negative errno code is returned.
 989  */
 990 int dev_open(struct net_device *dev)
 991 {
 992         int ret = 0;
 993
 994         /*
 995          *      Is it already up?
 996          */
 997
 998         if (dev->flags & IFF_UP)
 999                 return 0;
1000
1001         /*
1002          *      Is it even present?
1003          */
1004         if (!netif_device_present(dev))
1005                 return -ENODEV;
1006
1007         /*
1008          *      Call device private open method
1009          */
1010         set_bit(__LINK_STATE_START, &dev->state);
1011         if (dev->open) {
1012                 ret = dev->open(dev);
1013                 if (ret)
1014                         clear_bit(__LINK_STATE_START, &dev->state);
1015         }
1016
1017         /*
1018          *      If it went open OK then:
1019          */
1020
1021         if (!ret) {
1022                 /*
1023                  *      Set the flags.
1024                  */
1025                 dev->flags |= IFF_UP;
1026
1027                 /*
1028                  *      Initialize multicasting status
1029                  */
1030                 dev_set_rx_mode(dev);
1031
1032                 /*
1033                  *      Wakeup transmit queue engine
1034                  */
1035                 dev_activate(dev);
1036
1037                 /*
1038                  *      ... and announce new interface.
1039                  */
1040                 call_netdevice_notifiers(NETDEV_UP, dev);
1041         }
1042         return ret;
1043 }
1044
1045 /**
1046  *      dev_close - shutdown an interface.
1047  *      @dev: device to shutdown
1048  *
1049  *      This function moves an active device into down state. A
1050  *      %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1051  *      is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1052  *      chain.
1053  */
1054 int dev_close(struct net_device *dev)
1055 {
1056         might_sleep();
1057
1058         if (!(dev->flags & IFF_UP))
1059                 return 0;
1060
1061         /*
1062          *      Tell people we are going down, so that they can
1063          *      prepare to death, when device is still operating.
1064          */
1065         call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1066
1067         dev_deactivate(dev);
1068
1069         clear_bit(__LINK_STATE_START, &dev->state);
1070
1071         /* Synchronize to scheduled poll. We cannot touch poll list,
1072          * it can be even on different cpu. So just clear netif_running().
1073          *
1074          * dev->stop() will invoke napi_disable() on all of it's
1075          * napi_struct instances on this device.
1076          */
1077         smp_mb__after_clear_bit(); /* Commit netif_running(). */
1078
1079         /*
1080          *      Call the device specific close. This cannot fail.
1081          *      Only if device is UP
1082          *
1083          *      We allow it to be called even after a DETACH hot-plug
1084          *      event.
1085          */
1086         if (dev->stop)
1087                 dev->stop(dev);
1088
1089         /*
1090          *      Device is now down.
1091          */
1092
1093         dev->flags &= ~IFF_UP;
1094
1095         /*
1096          * Tell people we are down
1097          */
1098         call_netdevice_notifiers(NETDEV_DOWN, dev);
1099
1100         return 0;
1101 }
1102
1103
1104 static int dev_boot_phase = 1;
1105
1106 /*
1107  *      Device change register/unregister. These are not inline or static
1108  *      as we export them to the world.
1109  */
1110
1111 /**
1112  *      register_netdevice_notifier - register a network notifier block
1113  *      @nb: notifier
1114  *
1115  *      Register a notifier to be called when network device events occur.
1116  *      The notifier passed is linked into the kernel structures and must
1117  *      not be reused until it has been unregistered. A negative errno code
1118  *      is returned on a failure.
1119  *
1120  *      When registered all registration and up events are replayed
1121  *      to the new notifier to allow device to have a race free
1122  *      view of the network device list.
1123  */
1124
1125 int register_netdevice_notifier(struct notifier_block *nb)
1126 {
1127         struct net_device *dev;
1128         struct net_device *last;
1129         struct net *net;
1130         int err;
1131
1132         rtnl_lock();
1133         err = raw_notifier_chain_register(&netdev_chain, nb);
1134         if (err)
1135                 goto unlock;
1136         if (dev_boot_phase)
1137                 goto unlock;
1138         for_each_net(net) {
1139                 for_each_netdev(net, dev) {
1140                         err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1141                         err = notifier_to_errno(err);
1142                         if (err)
1143                                 goto rollback;
1144
1145                         if (!(dev->flags & IFF_UP))
1146                                 continue;
1147
1148                         nb->notifier_call(nb, NETDEV_UP, dev);
1149                 }
1150         }
1151
1152 unlock:
1153         rtnl_unlock();
1154         return err;
1155
1156 rollback:
1157         last = dev;
1158         for_each_net(net) {
1159                 for_each_netdev(net, dev) {
1160                         if (dev == last)
1161                                 break;
1162
1163                         if (dev->flags & IFF_UP) {
1164                                 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1165                                 nb->notifier_call(nb, NETDEV_DOWN, dev);
1166                         }
1167                         nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1168                 }
1169         }
1170         goto unlock;
1171 }
1172
1173 /**
1174  *      unregister_netdevice_notifier - unregister a network notifier block
1175  *      @nb: notifier
1176  *
1177  *      Unregister a notifier previously registered by
1178  *      register_netdevice_notifier(). The notifier is unlinked into the
1179  *      kernel structures and may then be reused. A negative errno code
1180  *      is returned on a failure.
1181  */
1182
1183 int unregister_netdevice_notifier(struct notifier_block *nb)
1184 {
1185         int err;
1186
1187         rtnl_lock();
1188         err = raw_notifier_chain_unregister(&netdev_chain, nb);
1189         rtnl_unlock();
1190         return err;
1191 }
1192
1193 /**
1194  *      call_netdevice_notifiers - call all network notifier blocks
1195  *      @val: value passed unmodified to notifier function
1196  *      @v:   pointer passed unmodified to notifier function
1197  *
1198  *      Call all network notifier blocks.  Parameters and return value
1199  *      are as for raw_notifier_call_chain().
1200  */
1201
1202 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1203 {
1204         return raw_notifier_call_chain(&netdev_chain, val, dev);
1205 }
1206
1207 /* When > 0 there are consumers of rx skb time stamps */
1208 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1209
1210 void net_enable_timestamp(void)
1211 {
1212         atomic_inc(&netstamp_needed);
1213 }
1214
1215 void net_disable_timestamp(void)
1216 {
1217         atomic_dec(&netstamp_needed);
1218 }
1219
1220 static inline void net_timestamp(struct sk_buff *skb)
1221 {
1222         if (atomic_read(&netstamp_needed))
1223                 __net_timestamp(skb);
1224         else
1225                 skb->tstamp.tv64 = 0;
1226 }
1227
1228 /*
1229  *      Support routine. Sends outgoing frames to any network
1230  *      taps currently in use.
1231  */
1232
1233 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1234 {
1235         struct packet_type *ptype;
1236
1237         net_timestamp(skb);
1238
1239         rcu_read_lock();
1240         list_for_each_entry_rcu(ptype, &ptype_all, list) {
1241                 /* Never send packets back to the socket
1242                  * they originated from - MvS (miquels@drinkel.ow.org)
1243                  */
1244                 if ((ptype->dev == dev || !ptype->dev) &&
1245                     (ptype->af_packet_priv == NULL ||
1246                      (struct sock *)ptype->af_packet_priv != skb->sk)) {
1247                         struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1248                         if (!skb2)
1249                                 break;
1250
1251                         /* skb->nh should be correctly
1252                            set by sender, so that the second statement is
1253                            just protection against buggy protocols.
1254                          */
1255                         skb_reset_mac_header(skb2);
1256
1257                         if (skb_network_header(skb2) < skb2->data ||
1258                             skb2->network_header > skb2->tail) {
1259                                 if (net_ratelimit())
1260                                         printk(KERN_CRIT "protocol %04x is "
1261                                                "buggy, dev %s\n",
1262                                                skb2->protocol, dev->name);
1263                                 skb_reset_network_header(skb2);
1264                         }
1265
1266                         skb2->transport_header = skb2->network_header;
1267                         skb2->pkt_type = PACKET_OUTGOING;
1268                         ptype->func(skb2, skb->dev, ptype, skb->dev);
1269                 }
1270         }
1271         rcu_read_unlock();
1272 }
1273
1274
1275 void __netif_schedule(struct net_device *dev)
1276 {
1277         if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
1278                 unsigned long flags;
1279                 struct softnet_data *sd;
1280
1281                 local_irq_save(flags);
1282                 sd = &__get_cpu_var(softnet_data);
1283                 dev->next_sched = sd->output_queue;
1284                 sd->output_queue = dev;
1285                 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1286                 local_irq_restore(flags);
1287         }
1288 }
1289 EXPORT_SYMBOL(__netif_schedule);
1290
1291 void dev_kfree_skb_irq(struct sk_buff *skb)
1292 {
1293         if (atomic_dec_and_test(&skb->users)) {
1294                 struct softnet_data *sd;
1295                 unsigned long flags;
1296
1297                 local_irq_save(flags);
1298                 sd = &__get_cpu_var(softnet_data);
1299                 skb->next = sd->completion_queue;
1300                 sd->completion_queue = skb;
1301                 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1302                 local_irq_restore(flags);
1303         }
1304 }
1305 EXPORT_SYMBOL(dev_kfree_skb_irq);
1306
1307 void dev_kfree_skb_any(struct sk_buff *skb)
1308 {
1309         if (in_irq() || irqs_disabled())
1310                 dev_kfree_skb_irq(skb);
1311         else
1312                 dev_kfree_skb(skb);
1313 }
1314 EXPORT_SYMBOL(dev_kfree_skb_any);
1315
1316
1317 /**
1318  * netif_device_detach - mark device as removed
1319  * @dev: network device
1320  *
1321  * Mark device as removed from system and therefore no longer available.
1322  */
1323 void netif_device_detach(struct net_device *dev)
1324 {
1325         if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1326             netif_running(dev)) {
1327                 netif_stop_queue(dev);
1328         }
1329 }
1330 EXPORT_SYMBOL(netif_device_detach);
1331
1332 /**
1333  * netif_device_attach - mark device as attached
1334  * @dev: network device
1335  *
1336  * Mark device as attached from system and restart if needed.
1337  */
1338 void netif_device_attach(struct net_device *dev)
1339 {
1340         if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1341             netif_running(dev)) {
1342                 netif_wake_queue(dev);
1343                 __netdev_watchdog_up(dev);
1344         }
1345 }
1346 EXPORT_SYMBOL(netif_device_attach);
1347
1348
1349 /*
1350  * Invalidate hardware checksum when packet is to be mangled, and
1351  * complete checksum manually on outgoing path.
1352  */
1353 int skb_checksum_help(struct sk_buff *skb)
1354 {
1355         __wsum csum;
1356         int ret = 0, offset;
1357
1358         if (skb->ip_summed == CHECKSUM_COMPLETE)
1359                 goto out_set_summed;
1360
1361         if (unlikely(skb_shinfo(skb)->gso_size)) {
1362                 /* Let GSO fix up the checksum. */
1363                 goto out_set_summed;
1364         }
1365
1366         if (skb_cloned(skb)) {
1367                 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1368                 if (ret)
1369                         goto out;
1370         }
1371
1372         offset = skb->csum_start - skb_headroom(skb);
1373         BUG_ON(offset > (int)skb->len);
1374         csum = skb_checksum(skb, offset, skb->len-offset, 0);
1375
1376         offset = skb_headlen(skb) - offset;
1377         BUG_ON(offset <= 0);
1378         BUG_ON(skb->csum_offset + 2 > offset);
1379
1380         *(__sum16 *)(skb->head + skb->csum_start + skb->csum_offset) =
1381                 csum_fold(csum);
1382 out_set_summed:
1383         skb->ip_summed = CHECKSUM_NONE;
1384 out:
1385         return ret;
1386 }
1387
1388 /**
1389  *      skb_gso_segment - Perform segmentation on skb.
1390  *      @skb: buffer to segment
1391  *      @features: features for the output path (see dev->features)
1392  *
1393  *      This function segments the given skb and returns a list of segments.
1394  *
1395  *      It may return NULL if the skb requires no segmentation.  This is
1396  *      only possible when GSO is used for verifying header integrity.
1397  */
1398 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1399 {
1400         struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1401         struct packet_type *ptype;
1402         __be16 type = skb->protocol;
1403         int err;
1404
1405         BUG_ON(skb_shinfo(skb)->frag_list);
1406
1407         skb_reset_mac_header(skb);
1408         skb->mac_len = skb->network_header - skb->mac_header;
1409         __skb_pull(skb, skb->mac_len);
1410
1411         if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
1412                 if (skb_header_cloned(skb) &&
1413                     (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1414                         return ERR_PTR(err);
1415         }
1416
1417         rcu_read_lock();
1418         list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
1419                 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1420                         if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1421                                 err = ptype->gso_send_check(skb);
1422                                 segs = ERR_PTR(err);
1423                                 if (err || skb_gso_ok(skb, features))
1424                                         break;
1425                                 __skb_push(skb, (skb->data -
1426                                                  skb_network_header(skb)));
1427                         }
1428                         segs = ptype->gso_segment(skb, features);
1429                         break;
1430                 }
1431         }
1432         rcu_read_unlock();
1433
1434         __skb_push(skb, skb->data - skb_mac_header(skb));
1435
1436         return segs;
1437 }
1438
1439 EXPORT_SYMBOL(skb_gso_segment);
1440
1441 /* Take action when hardware reception checksum errors are detected. */
1442 #ifdef CONFIG_BUG
1443 void netdev_rx_csum_fault(struct net_device *dev)
1444 {
1445         if (net_ratelimit()) {
1446                 printk(KERN_ERR "%s: hw csum failure.\n",
1447                         dev ? dev->name : "<unknown>");
1448                 dump_stack();
1449         }
1450 }
1451 EXPORT_SYMBOL(netdev_rx_csum_fault);
1452 #endif
1453
1454 /* Actually, we should eliminate this check as soon as we know, that:
1455  * 1. IOMMU is present and allows to map all the memory.
1456  * 2. No high memory really exists on this machine.
1457  */
1458
1459 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1460 {
1461 #ifdef CONFIG_HIGHMEM
1462         int i;
1463
1464         if (dev->features & NETIF_F_HIGHDMA)
1465                 return 0;
1466
1467         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1468                 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1469                         return 1;
1470
1471 #endif
1472         return 0;
1473 }
1474
1475 struct dev_gso_cb {
1476         void (*destructor)(struct sk_buff *skb);
1477 };
1478
1479 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1480
1481 static void dev_gso_skb_destructor(struct sk_buff *skb)
1482 {
1483         struct dev_gso_cb *cb;
1484
1485         do {
1486                 struct sk_buff *nskb = skb->next;
1487
1488                 skb->next = nskb->next;
1489                 nskb->next = NULL;
1490                 kfree_skb(nskb);
1491         } while (skb->next);
1492
1493         cb = DEV_GSO_CB(skb);
1494         if (cb->destructor)
1495                 cb->destructor(skb);
1496 }
1497
1498 /**
1499  *      dev_gso_segment - Perform emulated hardware segmentation on skb.
1500  *      @skb: buffer to segment
1501  *
1502  *      This function segments the given skb and stores the list of segments
1503  *      in skb->next.
1504  */
1505 static int dev_gso_segment(struct sk_buff *skb)
1506 {
1507         struct net_device *dev = skb->dev;
1508         struct sk_buff *segs;
1509         int features = dev->features & ~(illegal_highdma(dev, skb) ?
1510                                          NETIF_F_SG : 0);
1511
1512         segs = skb_gso_segment(skb, features);
1513
1514         /* Verifying header integrity only. */
1515         if (!segs)
1516                 return 0;
1517
1518         if (unlikely(IS_ERR(segs)))
1519                 return PTR_ERR(segs);
1520
1521         skb->next = segs;
1522         DEV_GSO_CB(skb)->destructor = skb->destructor;
1523         skb->destructor = dev_gso_skb_destructor;
1524
1525         return 0;
1526 }
1527
1528 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
1529 {
1530         if (likely(!skb->next)) {
1531                 if (!list_empty(&ptype_all))
1532                         dev_queue_xmit_nit(skb, dev);
1533
1534                 if (netif_needs_gso(dev, skb)) {
1535                         if (unlikely(dev_gso_segment(skb)))
1536                                 goto out_kfree_skb;
1537                         if (skb->next)
1538                                 goto gso;
1539                 }
1540
1541                 return dev->hard_start_xmit(skb, dev);
1542         }
1543
1544 gso:
1545         do {
1546                 struct sk_buff *nskb = skb->next;
1547                 int rc;
1548
1549                 skb->next = nskb->next;
1550                 nskb->next = NULL;
1551                 rc = dev->hard_start_xmit(nskb, dev);
1552                 if (unlikely(rc)) {
1553                         nskb->next = skb->next;
1554                         skb->next = nskb;
1555                         return rc;
1556                 }
1557                 if (unlikely((netif_queue_stopped(dev) ||
1558                              netif_subqueue_stopped(dev, skb->queue_mapping)) &&
1559                              skb->next))
1560                         return NETDEV_TX_BUSY;
1561         } while (skb->next);
1562
1563         skb->destructor = DEV_GSO_CB(skb)->destructor;
1564
1565 out_kfree_skb:
1566         kfree_skb(skb);
1567         return 0;
1568 }
1569
1570 /**
1571  *      dev_queue_xmit - transmit a buffer
1572  *      @skb: buffer to transmit
1573  *
1574  *      Queue a buffer for transmission to a network device. The caller must
1575  *      have set the device and priority and built the buffer before calling
1576  *      this function. The function can be called from an interrupt.
1577  *
1578  *      A negative errno code is returned on a failure. A success does not
1579  *      guarantee the frame will be transmitted as it may be dropped due
1580  *      to congestion or traffic shaping.
1581  *
1582  * -----------------------------------------------------------------------------------
1583  *      I notice this method can also return errors from the queue disciplines,
1584  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1585  *      be positive.
1586  *
1587  *      Regardless of the return value, the skb is consumed, so it is currently
1588  *      difficult to retry a send to this method.  (You can bump the ref count
1589  *      before sending to hold a reference for retry if you are careful.)
1590  *
1591  *      When calling this method, interrupts MUST be enabled.  This is because
1592  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1593  *          --BLG
1594  */
1595
1596 int dev_queue_xmit(struct sk_buff *skb)
1597 {
1598         struct net_device *dev = skb->dev;
1599         struct Qdisc *q;
1600         int rc = -ENOMEM;
1601
1602         /* GSO will handle the following emulations directly. */
1603         if (netif_needs_gso(dev, skb))
1604                 goto gso;
1605
1606         if (skb_shinfo(skb)->frag_list &&
1607             !(dev->features & NETIF_F_FRAGLIST) &&
1608             __skb_linearize(skb))
1609                 goto out_kfree_skb;
1610
1611         /* Fragmented skb is linearized if device does not support SG,
1612          * or if at least one of fragments is in highmem and device
1613          * does not support DMA from it.
1614          */
1615         if (skb_shinfo(skb)->nr_frags &&
1616             (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1617             __skb_linearize(skb))
1618                 goto out_kfree_skb;
1619
1620         /* If packet is not checksummed and device does not support
1621          * checksumming for this protocol, complete checksumming here.
1622          */
1623         if (skb->ip_summed == CHECKSUM_PARTIAL) {
1624                 skb_set_transport_header(skb, skb->csum_start -
1625                                               skb_headroom(skb));
1626
1627                 if (!(dev->features & NETIF_F_GEN_CSUM) &&
1628                     !((dev->features & NETIF_F_IP_CSUM) &&
1629                       skb->protocol == htons(ETH_P_IP)) &&
1630                     !((dev->features & NETIF_F_IPV6_CSUM) &&
1631                       skb->protocol == htons(ETH_P_IPV6)))
1632                         if (skb_checksum_help(skb))
1633                                 goto out_kfree_skb;
1634         }
1635
1636 gso:
1637         spin_lock_prefetch(&dev->queue_lock);
1638
1639         /* Disable soft irqs for various locks below. Also
1640          * stops preemption for RCU.
1641          */
1642         rcu_read_lock_bh();
1643
1644         /* Updates of qdisc are serialized by queue_lock.
1645          * The struct Qdisc which is pointed to by qdisc is now a
1646          * rcu structure - it may be accessed without acquiring
1647          * a lock (but the structure may be stale.) The freeing of the
1648          * qdisc will be deferred until it's known that there are no
1649          * more references to it.
1650          *
1651          * If the qdisc has an enqueue function, we still need to
1652          * hold the queue_lock before calling it, since queue_lock
1653          * also serializes access to the device queue.
1654          */
1655
1656         q = rcu_dereference(dev->qdisc);
1657 #ifdef CONFIG_NET_CLS_ACT
1658         skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1659 #endif
1660         if (q->enqueue) {
1661                 /* Grab device queue */
1662                 spin_lock(&dev->queue_lock);
1663                 q = dev->qdisc;
1664                 if (q->enqueue) {
1665                         /* reset queue_mapping to zero */
1666                         skb->queue_mapping = 0;
1667                         rc = q->enqueue(skb, q);
1668                         qdisc_run(dev);
1669                         spin_unlock(&dev->queue_lock);
1670
1671                         rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1672                         goto out;
1673                 }
1674                 spin_unlock(&dev->queue_lock);
1675         }
1676
1677         /* The device has no queue. Common case for software devices:
1678            loopback, all the sorts of tunnels...
1679
1680            Really, it is unlikely that netif_tx_lock protection is necessary
1681            here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1682            counters.)
1683            However, it is possible, that they rely on protection
1684            made by us here.
1685
1686            Check this and shot the lock. It is not prone from deadlocks.
1687            Either shot noqueue qdisc, it is even simpler 8)
1688          */
1689         if (dev->flags & IFF_UP) {
1690                 int cpu = smp_processor_id(); /* ok because BHs are off */
1691
1692                 if (dev->xmit_lock_owner != cpu) {
1693
1694                         HARD_TX_LOCK(dev, cpu);
1695
1696                         if (!netif_queue_stopped(dev) &&
1697                             !netif_subqueue_stopped(dev, skb->queue_mapping)) {
1698                                 rc = 0;
1699                                 if (!dev_hard_start_xmit(skb, dev)) {
1700                                         HARD_TX_UNLOCK(dev);
1701                                         goto out;
1702                                 }
1703                         }
1704                         HARD_TX_UNLOCK(dev);
1705                         if (net_ratelimit())
1706                                 printk(KERN_CRIT "Virtual device %s asks to "
1707                                        "queue packet!\n", dev->name);
1708                 } else {
1709                         /* Recursion is detected! It is possible,
1710                          * unfortunately */
1711                         if (net_ratelimit())
1712                                 printk(KERN_CRIT "Dead loop on virtual device "
1713                                        "%s, fix it urgently!\n", dev->name);
1714                 }
1715         }
1716
1717         rc = -ENETDOWN;
1718         rcu_read_unlock_bh();
1719
1720 out_kfree_skb:
1721         kfree_skb(skb);
1722         return rc;
1723 out:
1724         rcu_read_unlock_bh();
1725         return rc;
1726 }
1727
1728
1729 /*=======================================================================
1730                         Receiver routines
1731   =======================================================================*/
1732
1733 int netdev_max_backlog __read_mostly = 1000;
1734 int netdev_budget __read_mostly = 300;
1735 int weight_p __read_mostly = 64;            /* old backlog weight */
1736
1737 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1738
1739
1740 /**
1741  *      netif_rx        -       post buffer to the network code
1742  *      @skb: buffer to post
1743  *
1744  *      This function receives a packet from a device driver and queues it for
1745  *      the upper (protocol) levels to process.  It always succeeds. The buffer
1746  *      may be dropped during processing for congestion control or by the
1747  *      protocol layers.
1748  *
1749  *      return values:
1750  *      NET_RX_SUCCESS  (no congestion)
1751  *      NET_RX_CN_LOW   (low congestion)
1752  *      NET_RX_CN_MOD   (moderate congestion)
1753  *      NET_RX_CN_HIGH  (high congestion)
1754  *      NET_RX_DROP     (packet was dropped)
1755  *
1756  */
1757
1758 int netif_rx(struct sk_buff *skb)
1759 {
1760         struct softnet_data *queue;
1761         unsigned long flags;
1762
1763         /* if netpoll wants it, pretend we never saw it */
1764         if (netpoll_rx(skb))
1765                 return NET_RX_DROP;
1766
1767         if (!skb->tstamp.tv64)
1768                 net_timestamp(skb);
1769
1770         /*
1771          * The code is rearranged so that the path is the most
1772          * short when CPU is congested, but is still operating.
1773          */
1774         local_irq_save(flags);
1775         queue = &__get_cpu_var(softnet_data);
1776
1777         __get_cpu_var(netdev_rx_stat).total++;
1778         if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1779                 if (queue->input_pkt_queue.qlen) {
1780 enqueue:
1781                         dev_hold(skb->dev);
1782                         __skb_queue_tail(&queue->input_pkt_queue, skb);
1783                         local_irq_restore(flags);
1784                         return NET_RX_SUCCESS;
1785                 }
1786
1787                 napi_schedule(&queue->backlog);
1788                 goto enqueue;
1789         }
1790
1791         __get_cpu_var(netdev_rx_stat).dropped++;
1792         local_irq_restore(flags);
1793
1794         kfree_skb(skb);
1795         return NET_RX_DROP;
1796 }
1797
1798 int netif_rx_ni(struct sk_buff *skb)
1799 {
1800         int err;
1801
1802         preempt_disable();
1803         err = netif_rx(skb);
1804         if (local_softirq_pending())
1805                 do_softirq();
1806         preempt_enable();
1807
1808         return err;
1809 }
1810
1811 EXPORT_SYMBOL(netif_rx_ni);
1812
1813 static inline struct net_device *skb_bond(struct sk_buff *skb)
1814 {
1815         struct net_device *dev = skb->dev;
1816
1817         if (dev->master) {
1818                 if (skb_bond_should_drop(skb)) {
1819                         kfree_skb(skb);
1820                         return NULL;
1821                 }
1822                 skb->dev = dev->master;
1823         }
1824
1825         return dev;
1826 }
1827
1828
1829 static void net_tx_action(struct softirq_action *h)
1830 {
1831         struct softnet_data *sd = &__get_cpu_var(softnet_data);
1832
1833         if (sd->completion_queue) {
1834                 struct sk_buff *clist;
1835
1836                 local_irq_disable();
1837                 clist = sd->completion_queue;
1838                 sd->completion_queue = NULL;
1839                 local_irq_enable();
1840
1841                 while (clist) {
1842                         struct sk_buff *skb = clist;
1843                         clist = clist->next;
1844
1845                         BUG_TRAP(!atomic_read(&skb->users));
1846                         __kfree_skb(skb);
1847                 }
1848         }
1849
1850         if (sd->output_queue) {
1851                 struct net_device *head;
1852
1853                 local_irq_disable();
1854                 head = sd->output_queue;
1855                 sd->output_queue = NULL;
1856                 local_irq_enable();
1857
1858                 while (head) {
1859                         struct net_device *dev = head;
1860                         head = head->next_sched;
1861
1862                         smp_mb__before_clear_bit();
1863                         clear_bit(__LINK_STATE_SCHED, &dev->state);
1864
1865                         if (spin_trylock(&dev->queue_lock)) {
1866                                 qdisc_run(dev);
1867                                 spin_unlock(&dev->queue_lock);
1868                         } else {
1869                                 netif_schedule(dev);
1870                         }
1871                 }
1872         }
1873 }
1874
1875 static inline int deliver_skb(struct sk_buff *skb,
1876                               struct packet_type *pt_prev,
1877                               struct net_device *orig_dev)
1878 {
1879         atomic_inc(&skb->users);
1880         return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1881 }
1882
1883 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1884 /* These hooks defined here for ATM */
1885 struct net_bridge;
1886 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1887                                                 unsigned char *addr);
1888 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
1889
1890 /*
1891  * If bridge module is loaded call bridging hook.
1892  *  returns NULL if packet was consumed.
1893  */
1894 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
1895                                         struct sk_buff *skb) __read_mostly;
1896 static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
1897                                             struct packet_type **pt_prev, int *ret,
1898                                             struct net_device *orig_dev)
1899 {
1900         struct net_bridge_port *port;
1901
1902         if (skb->pkt_type == PACKET_LOOPBACK ||
1903             (port = rcu_dereference(skb->dev->br_port)) == NULL)
1904                 return skb;
1905
1906         if (*pt_prev) {
1907                 *ret = deliver_skb(skb, *pt_prev, orig_dev);
1908                 *pt_prev = NULL;
1909         }
1910
1911         return br_handle_frame_hook(port, skb);
1912 }
1913 #else
1914 #define handle_bridge(skb, pt_prev, ret, orig_dev)      (skb)
1915 #endif
1916
1917 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
1918 struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
1919 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
1920
1921 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
1922                                              struct packet_type **pt_prev,
1923                                              int *ret,
1924                                              struct net_device *orig_dev)
1925 {
1926         if (skb->dev->macvlan_port == NULL)
1927                 return skb;
1928
1929         if (*pt_prev) {
1930                 *ret = deliver_skb(skb, *pt_prev, orig_dev);
1931                 *pt_prev = NULL;
1932         }
1933         return macvlan_handle_frame_hook(skb);
1934 }
1935 #else
1936 #define handle_macvlan(skb, pt_prev, ret, orig_dev)     (skb)
1937 #endif
1938
1939 #ifdef CONFIG_NET_CLS_ACT
1940 /* TODO: Maybe we should just force sch_ingress to be compiled in
1941  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1942  * a compare and 2 stores extra right now if we dont have it on
1943  * but have CONFIG_NET_CLS_ACT
1944  * NOTE: This doesnt stop any functionality; if you dont have
1945  * the ingress scheduler, you just cant add policies on ingress.
1946  *
1947  */
1948 static int ing_filter(struct sk_buff *skb)
1949 {
1950         struct Qdisc *q;
1951         struct net_device *dev = skb->dev;
1952         int result = TC_ACT_OK;
1953
1954         if (dev->qdisc_ingress) {
1955                 __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
1956                 if (MAX_RED_LOOP < ttl++) {
1957                         printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n",
1958                                 skb->iif, skb->dev->ifindex);
1959                         return TC_ACT_SHOT;
1960                 }
1961
1962                 skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
1963
1964                 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
1965
1966                 spin_lock(&dev->ingress_lock);
1967                 if ((q = dev->qdisc_ingress) != NULL)
1968                         result = q->enqueue(skb, q);
1969                 spin_unlock(&dev->ingress_lock);
1970
1971         }
1972
1973         return result;
1974 }
1975 #endif
1976
1977 int netif_receive_skb(struct sk_buff *skb)
1978 {
1979         struct packet_type *ptype, *pt_prev;
1980         struct net_device *orig_dev;
1981         int ret = NET_RX_DROP;
1982         __be16 type;
1983
1984         /* if we've gotten here through NAPI, check netpoll */
1985         if (netpoll_receive_skb(skb))
1986                 return NET_RX_DROP;
1987
1988         if (!skb->tstamp.tv64)
1989                 net_timestamp(skb);
1990
1991         if (!skb->iif)
1992                 skb->iif = skb->dev->ifindex;
1993
1994         orig_dev = skb_bond(skb);
1995
1996         if (!orig_dev)
1997                 return NET_RX_DROP;
1998
1999         __get_cpu_var(netdev_rx_stat).total++;
2000
2001         skb_reset_network_header(skb);
2002         skb_reset_transport_header(skb);
2003         skb->mac_len = skb->network_header - skb->mac_header;
2004
2005         pt_prev = NULL;
2006
2007         rcu_read_lock();
2008
2009 #ifdef CONFIG_NET_CLS_ACT
2010         if (skb->tc_verd & TC_NCLS) {
2011                 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2012                 goto ncls;
2013         }
2014 #endif
2015
2016         list_for_each_entry_rcu(ptype, &ptype_all, list) {
2017                 if (!ptype->dev || ptype->dev == skb->dev) {
2018                         if (pt_prev)
2019                                 ret = deliver_skb(skb, pt_prev, orig_dev);
2020                         pt_prev = ptype;
2021                 }
2022         }
2023
2024 #ifdef CONFIG_NET_CLS_ACT
2025         if (pt_prev) {
2026                 ret = deliver_skb(skb, pt_prev, orig_dev);
2027                 pt_prev = NULL; /* noone else should process this after*/
2028         } else {
2029                 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2030         }
2031
2032         ret = ing_filter(skb);
2033
2034         if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
2035                 kfree_skb(skb);
2036                 goto out;
2037         }
2038
2039         skb->tc_verd = 0;
2040 ncls:
2041 #endif
2042
2043         skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2044         if (!skb)
2045                 goto out;
2046         skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2047         if (!skb)
2048                 goto out;
2049
2050         type = skb->protocol;
2051         list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
2052                 if (ptype->type == type &&
2053                     (!ptype->dev || ptype->dev == skb->dev)) {
2054                         if (pt_prev)
2055                                 ret = deliver_skb(skb, pt_prev, orig_dev);
2056                         pt_prev = ptype;
2057                 }
2058         }
2059
2060         if (pt_prev) {
2061                 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2062         } else {
2063                 kfree_skb(skb);
2064                 /* Jamal, now you will not able to escape explaining
2065                  * me how you were going to use this. :-)
2066                  */
2067                 ret = NET_RX_DROP;
2068         }
2069
2070 out:
2071         rcu_read_unlock();
2072         return ret;
2073 }
2074
2075 static int process_backlog(struct napi_struct *napi, int quota)
2076 {
2077         int work = 0;
2078         struct softnet_data *queue = &__get_cpu_var(softnet_data);
2079         unsigned long start_time = jiffies;
2080
2081         napi->weight = weight_p;
2082         do {
2083                 struct sk_buff *skb;
2084                 struct net_device *dev;
2085
2086                 local_irq_disable();
2087                 skb = __skb_dequeue(&queue->input_pkt_queue);
2088                 if (!skb) {
2089                         __napi_complete(napi);
2090                         local_irq_enable();
2091                         break;
2092                 }
2093
2094                 local_irq_enable();
2095
2096                 dev = skb->dev;
2097
2098                 netif_receive_skb(skb);
2099
2100                 dev_put(dev);
2101         } while (++work < quota && jiffies == start_time);
2102
2103         return work;
2104 }
2105
2106 /**
2107  * __napi_schedule - schedule for receive
2108  * @napi: entry to schedule
2109  *
2110  * The entry's receive function will be scheduled to run
2111  */
2112 void fastcall __napi_schedule(struct napi_struct *n)
2113 {
2114         unsigned long flags;
2115
2116         local_irq_save(flags);
2117         list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2118         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2119         local_irq_restore(flags);
2120 }
2121 EXPORT_SYMBOL(__napi_schedule);
2122
2123
2124 static void net_rx_action(struct softirq_action *h)
2125 {
2126         struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
2127         unsigned long start_time = jiffies;
2128         int budget = netdev_budget;
2129         void *have;
2130
2131         local_irq_disable();
2132
2133         while (!list_empty(list)) {
2134                 struct napi_struct *n;
2135                 int work, weight;
2136
2137                 /* If softirq window is exhuasted then punt.
2138                  *
2139                  * Note that this is a slight policy change from the
2140                  * previous NAPI code, which would allow up to 2
2141                  * jiffies to pass before breaking out.  The test
2142                  * used to be "jiffies - start_time > 1".
2143                  */
2144                 if (unlikely(budget <= 0 || jiffies != start_time))
2145                         goto softnet_break;
2146
2147                 local_irq_enable();
2148
2149                 /* Even though interrupts have been re-enabled, this
2150                  * access is safe because interrupts can only add new
2151                  * entries to the tail of this list, and only ->poll()
2152                  * calls can remove this head entry from the list.
2153                  */
2154                 n = list_entry(list->next, struct napi_struct, poll_list);
2155
2156                 have = netpoll_poll_lock(n);
2157
2158                 weight = n->weight;
2159
2160                 work = n->poll(n, weight);
2161
2162                 WARN_ON_ONCE(work > weight);
2163
2164                 budget -= work;
2165
2166                 local_irq_disable();
2167
2168                 /* Drivers must not modify the NAPI state if they
2169                  * consume the entire weight.  In such cases this code
2170                  * still "owns" the NAPI instance and therefore can
2171                  * move the instance around on the list at-will.
2172                  */
2173                 if (unlikely(work == weight))
2174                         list_move_tail(&n->poll_list, list);
2175
2176                 netpoll_poll_unlock(have);
2177         }
2178 out:
2179         local_irq_enable();
2180
2181 #ifdef CONFIG_NET_DMA
2182         /*
2183          * There may not be any more sk_buffs coming right now, so push
2184          * any pending DMA copies to hardware
2185          */
2186         if (!cpus_empty(net_dma.channel_mask)) {
2187                 int chan_idx;
2188                 for_each_cpu_mask(chan_idx, net_dma.channel_mask) {
2189                         struct dma_chan *chan = net_dma.channels[chan_idx];
2190                         if (chan)
2191                                 dma_async_memcpy_issue_pending(chan);
2192                 }
2193         }
2194 #endif
2195
2196         return;
2197
2198 softnet_break:
2199         __get_cpu_var(netdev_rx_stat).time_squeeze++;
2200         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2201         goto out;
2202 }
2203
2204 static gifconf_func_t * gifconf_list [NPROTO];
2205
2206 /**
2207  *      register_gifconf        -       register a SIOCGIF handler
2208  *      @family: Address family
2209  *      @gifconf: Function handler
2210  *
2211  *      Register protocol dependent address dumping routines. The handler
2212  *      that is passed must not be freed or reused until it has been replaced
2213  *      by another handler.
2214  */
2215 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2216 {
2217         if (family >= NPROTO)
2218                 return -EINVAL;
2219         gifconf_list[family] = gifconf;
2220         return 0;
2221 }
2222
2223
2224 /*
2225  *      Map an interface index to its name (SIOCGIFNAME)
2226  */
2227
2228 /*
2229  *      We need this ioctl for efficient implementation of the
2230  *      if_indextoname() function required by the IPv6 API.  Without
2231  *      it, we would have to search all the interfaces to find a
2232  *      match.  --pb
2233  */
2234
2235 static int dev_ifname(struct net *net, struct ifreq __user *arg)
2236 {
2237         struct net_device *dev;
2238         struct ifreq ifr;
2239
2240         /*
2241          *      Fetch the caller's info block.
2242          */
2243
2244         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2245                 return -EFAULT;
2246
2247         read_lock(&dev_base_lock);
2248         dev = __dev_get_by_index(net, ifr.ifr_ifindex);
2249         if (!dev) {
2250                 read_unlock(&dev_base_lock);
2251                 return -ENODEV;
2252         }
2253
2254         strcpy(ifr.ifr_name, dev->name);
2255         read_unlock(&dev_base_lock);
2256
2257         if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2258                 return -EFAULT;
2259         return 0;
2260 }
2261
2262 /*
2263  *      Perform a SIOCGIFCONF call. This structure will change
2264  *      size eventually, and there is nothing I can do about it.
2265  *      Thus we will need a 'compatibility mode'.
2266  */
2267
2268 static int dev_ifconf(struct net *net, char __user *arg)
2269 {
2270         struct ifconf ifc;
2271         struct net_device *dev;
2272         char __user *pos;
2273         int len;
2274         int total;
2275         int i;
2276
2277         /*
2278          *      Fetch the caller's info block.
2279          */
2280
2281         if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2282                 return -EFAULT;
2283
2284         pos = ifc.ifc_buf;
2285         len = ifc.ifc_len;
2286
2287         /*
2288          *      Loop over the interfaces, and write an info block for each.
2289          */
2290
2291         total = 0;
2292         for_each_netdev(net, dev) {
2293                 for (i = 0; i < NPROTO; i++) {
2294                         if (gifconf_list[i]) {
2295                                 int done;
2296                                 if (!pos)
2297                                         done = gifconf_list[i](dev, NULL, 0);
2298                                 else
2299                                         done = gifconf_list[i](dev, pos + total,
2300                                                                len - total);
2301                                 if (done < 0)
2302                                         return -EFAULT;
2303                                 total += done;
2304                         }
2305                 }
2306         }
2307
2308         /*
2309          *      All done.  Write the updated control block back to the caller.
2310          */
2311         ifc.ifc_len = total;
2312
2313         /*
2314          *      Both BSD and Solaris return 0 here, so we do too.
2315          */
2316         return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2317 }
2318
2319 #ifdef CONFIG_PROC_FS
2320 /*
2321  *      This is invoked by the /proc filesystem handler to display a device
2322  *      in detail.
2323  */
2324 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2325 {
2326         struct net *net = seq->private;
2327         loff_t off;
2328         struct net_device *dev;
2329
2330         read_lock(&dev_base_lock);
2331         if (!*pos)
2332                 return SEQ_START_TOKEN;
2333
2334         off = 1;
2335         for_each_netdev(net, dev)
2336                 if (off++ == *pos)
2337                         return dev;
2338
2339         return NULL;
2340 }
2341
2342 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2343 {
2344         struct net *net = seq->private;
2345         ++*pos;
2346         return v == SEQ_START_TOKEN ?
2347                 first_net_device(net) : next_net_device((struct net_device *)v);
2348 }
2349
2350 void dev_seq_stop(struct seq_file *seq, void *v)
2351 {
2352         read_unlock(&dev_base_lock);
2353 }
2354
2355 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2356 {
2357         struct net_device_stats *stats = dev->get_stats(dev);
2358
2359         seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2360                    "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2361                    dev->name, stats->rx_bytes, stats->rx_packets,
2362                    stats->rx_errors,
2363                    stats->rx_dropped + stats->rx_missed_errors,
2364                    stats->rx_fifo_errors,
2365                    stats->rx_length_errors + stats->rx_over_errors +
2366                     stats->rx_crc_errors + stats->rx_frame_errors,
2367                    stats->rx_compressed, stats->multicast,
2368                    stats->tx_bytes, stats->tx_packets,
2369                    stats->tx_errors, stats->tx_dropped,
2370                    stats->tx_fifo_errors, stats->collisions,
2371                    stats->tx_carrier_errors +
2372                     stats->tx_aborted_errors +
2373                     stats->tx_window_errors +
2374                     stats->tx_heartbeat_errors,
2375                    stats->tx_compressed);
2376 }
2377
2378 /*
2379  *      Called from the PROCfs module. This now uses the new arbitrary sized
2380  *      /proc/net interface to create /proc/net/dev
2381  */
2382 static int dev_seq_show(struct seq_file *seq, void *v)
2383 {
2384         if (v == SEQ_START_TOKEN)
2385                 seq_puts(seq, "Inter-|   Receive                            "
2386                               "                    |  Transmit\n"
2387                               " face |bytes    packets errs drop fifo frame "
2388                               "compressed multicast|bytes    packets errs "
2389                               "drop fifo colls carrier compressed\n");
2390         else
2391                 dev_seq_printf_stats(seq, v);
2392         return 0;
2393 }
2394
2395 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2396 {
2397         struct netif_rx_stats *rc = NULL;
2398
2399         while (*pos < NR_CPUS)
2400                 if (cpu_online(*pos)) {
2401                         rc = &per_cpu(netdev_rx_stat, *pos);
2402                         break;
2403                 } else
2404                         ++*pos;
2405         return rc;
2406 }
2407
2408 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2409 {
2410         return softnet_get_online(pos);
2411 }
2412
2413 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2414 {
2415         ++*pos;
2416         return softnet_get_online(pos);
2417 }
2418
2419 static void softnet_seq_stop(struct seq_file *seq, void *v)
2420 {
2421 }
2422
2423 static int softnet_seq_show(struct seq_file *seq, void *v)
2424 {
2425         struct netif_rx_stats *s = v;
2426
2427         seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2428                    s->total, s->dropped, s->time_squeeze, 0,
2429                    0, 0, 0, 0, /* was fastroute */
2430                    s->cpu_collision );
2431         return 0;
2432 }
2433
2434 static const struct seq_operations dev_seq_ops = {
2435         .start = dev_seq_start,
2436         .next  = dev_seq_next,
2437         .stop  = dev_seq_stop,
2438         .show  = dev_seq_show,
2439 };
2440
2441 static int dev_seq_open(struct inode *inode, struct file *file)
2442 {
2443         struct seq_file *seq;
2444         int res;
2445         res =  seq_open(file, &dev_seq_ops);
2446         if (!res) {
2447                 seq = file->private_data;
2448                 seq->private = get_proc_net(inode);
2449                 if (!seq->private) {
2450                         seq_release(inode, file);
2451                         res = -ENXIO;
2452                 }
2453         }
2454         return res;
2455 }
2456
2457 static int dev_seq_release(struct inode *inode, struct file *file)
2458 {
2459         struct seq_file *seq = file->private_data;
2460         struct net *net = seq->private;
2461         put_net(net);
2462         return seq_release(inode, file);
2463 }
2464
2465 static const struct file_operations dev_seq_fops = {
2466         .owner   = THIS_MODULE,
2467         .open    = dev_seq_open,
2468         .read    = seq_read,
2469         .llseek  = seq_lseek,
2470         .release = dev_seq_release,
2471 };
2472
2473 static const struct seq_operations softnet_seq_ops = {
2474         .start = softnet_seq_start,
2475         .next  = softnet_seq_next,
2476         .stop  = softnet_seq_stop,
2477         .show  = softnet_seq_show,
2478 };
2479
2480 static int softnet_seq_open(struct inode *inode, struct file *file)
2481 {
2482         return seq_open(file, &softnet_seq_ops);
2483 }
2484
2485 static const struct file_operations softnet_seq_fops = {
2486         .owner   = THIS_MODULE,
2487         .open    = softnet_seq_open,
2488         .read    = seq_read,
2489         .llseek  = seq_lseek,
2490         .release = seq_release,
2491 };
2492
2493 static void *ptype_get_idx(loff_t pos)
2494 {
2495         struct packet_type *pt = NULL;
2496         loff_t i = 0;
2497         int t;
2498
2499         list_for_each_entry_rcu(pt, &ptype_all, list) {
2500                 if (i == pos)
2501                         return pt;
2502                 ++i;
2503         }
2504
2505         for (t = 0; t < 16; t++) {
2506                 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2507                         if (i == pos)
2508                                 return pt;
2509                         ++i;
2510                 }
2511         }
2512         return NULL;
2513 }
2514
2515 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
2516 {
2517         rcu_read_lock();
2518         return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2519 }
2520
2521 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2522 {
2523         struct packet_type *pt;
2524         struct list_head *nxt;
2525         int hash;
2526
2527         ++*pos;
2528         if (v == SEQ_START_TOKEN)
2529                 return ptype_get_idx(0);
2530
2531         pt = v;
2532         nxt = pt->list.next;
2533         if (pt->type == htons(ETH_P_ALL)) {
2534                 if (nxt != &ptype_all)
2535                         goto found;
2536                 hash = 0;
2537                 nxt = ptype_base[0].next;
2538         } else
2539                 hash = ntohs(pt->type) & 15;
2540
2541         while (nxt == &ptype_base[hash]) {
2542                 if (++hash >= 16)
2543                         return NULL;
2544                 nxt = ptype_base[hash].next;
2545         }
2546 found:
2547         return list_entry(nxt, struct packet_type, list);
2548 }
2549
2550 static void ptype_seq_stop(struct seq_file *seq, void *v)
2551 {
2552         rcu_read_unlock();
2553 }
2554
2555 static void ptype_seq_decode(struct seq_file *seq, void *sym)
2556 {
2557 #ifdef CONFIG_KALLSYMS
2558         unsigned long offset = 0, symsize;
2559         const char *symname;
2560         char *modname;
2561         char namebuf[128];
2562
2563         symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
2564                                   &modname, namebuf);
2565
2566         if (symname) {
2567                 char *delim = ":";
2568
2569                 if (!modname)
2570                         modname = delim = "";
2571                 seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
2572                            symname, offset);
2573                 return;
2574         }
2575 #endif
2576
2577         seq_printf(seq, "[%p]", sym);
2578 }
2579
2580 static int ptype_seq_show(struct seq_file *seq, void *v)
2581 {
2582         struct packet_type *pt = v;
2583
2584         if (v == SEQ_START_TOKEN)
2585                 seq_puts(seq, "Type Device      Function\n");
2586         else {
2587                 if (pt->type == htons(ETH_P_ALL))
2588                         seq_puts(seq, "ALL ");
2589                 else
2590                         seq_printf(seq, "%04x", ntohs(pt->type));
2591
2592                 seq_printf(seq, " %-8s ",
2593                            pt->dev ? pt->dev->name : "");
2594                 ptype_seq_decode(seq,  pt->func);
2595                 seq_putc(seq, '\n');
2596         }
2597
2598         return 0;
2599 }
2600
2601 static const struct seq_operations ptype_seq_ops = {
2602         .start = ptype_seq_start,
2603         .next  = ptype_seq_next,
2604         .stop  = ptype_seq_stop,
2605         .show  = ptype_seq_show,
2606 };
2607
2608 static int ptype_seq_open(struct inode *inode, struct file *file)
2609 {
2610         return seq_open(file, &ptype_seq_ops);
2611 }
2612
2613 static const struct file_operations ptype_seq_fops = {
2614         .owner   = THIS_MODULE,
2615         .open    = ptype_seq_open,
2616         .read    = seq_read,
2617         .llseek  = seq_lseek,
2618         .release = seq_release,
2619 };
2620
2621
2622 static int dev_proc_net_init(struct net *net)
2623 {
2624         int rc = -ENOMEM;
2625
2626         if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
2627                 goto out;
2628         if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
2629                 goto out_dev;
2630         if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
2631                 goto out_softnet;
2632
2633         if (wext_proc_init(net))
2634                 goto out_ptype;
2635         rc = 0;
2636 out:
2637         return rc;
2638 out_ptype:
2639         proc_net_remove(net, "ptype");
2640 out_softnet:
2641         proc_net_remove(net, "softnet_stat");
2642 out_dev:
2643         proc_net_remove(net, "dev");
2644         goto out;
2645 }
2646
2647 static void dev_proc_net_exit(struct net *net)
2648 {
2649         wext_proc_exit(net);
2650
2651         proc_net_remove(net, "ptype");
2652         proc_net_remove(net, "softnet_stat");
2653         proc_net_remove(net, "dev");
2654 }
2655
2656 static struct pernet_operations dev_proc_ops = {
2657         .init = dev_proc_net_init,
2658         .exit = dev_proc_net_exit,
2659 };
2660
2661 static int __init dev_proc_init(void)
2662 {
2663         return register_pernet_subsys(&dev_proc_ops);
2664 }
2665 #else
2666 #define dev_proc_init() 0
2667 #endif  /* CONFIG_PROC_FS */
2668
2669
2670 /**
2671  *      netdev_set_master       -       set up master/slave pair
2672  *      @slave: slave device
2673  *      @master: new master device
2674  *
2675  *      Changes the master device of the slave. Pass %NULL to break the
2676  *      bonding. The caller must hold the RTNL semaphore. On a failure
2677  *      a negative errno code is returned. On success the reference counts
2678  *      are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2679  *      function returns zero.
2680  */
2681 int netdev_set_master(struct net_device *slave, struct net_device *master)
2682 {
2683         struct net_device *old = slave->master;
2684
2685         ASSERT_RTNL();
2686
2687         if (master) {
2688                 if (old)
2689                         return -EBUSY;
2690                 dev_hold(master);
2691         }
2692
2693         slave->master = master;
2694
2695         synchronize_net();
2696
2697         if (old)
2698                 dev_put(old);
2699
2700         if (master)
2701                 slave->flags |= IFF_SLAVE;
2702         else
2703                 slave->flags &= ~IFF_SLAVE;
2704
2705         rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2706         return 0;
2707 }
2708
2709 static void __dev_set_promiscuity(struct net_device *dev, int inc)
2710 {
2711         unsigned short old_flags = dev->flags;
2712
2713         ASSERT_RTNL();
2714
2715         if ((dev->promiscuity += inc) == 0)
2716                 dev->flags &= ~IFF_PROMISC;
2717         else
2718                 dev->flags |= IFF_PROMISC;
2719         if (dev->flags != old_flags) {
2720                 printk(KERN_INFO "device %s %s promiscuous mode\n",
2721                        dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2722                                                                "left");
2723                 audit_log(current->audit_context, GFP_ATOMIC,
2724                         AUDIT_ANOM_PROMISCUOUS,
2725                         "dev=%s prom=%d old_prom=%d auid=%u",
2726                         dev->name, (dev->flags & IFF_PROMISC),
2727                         (old_flags & IFF_PROMISC),
2728                         audit_get_loginuid(current->audit_context));
2729
2730                 if (dev->change_rx_flags)
2731                         dev->change_rx_flags(dev, IFF_PROMISC);
2732         }
2733 }
2734
2735 /**
2736  *      dev_set_promiscuity     - update promiscuity count on a device
2737  *      @dev: device
2738  *      @inc: modifier
2739  *
2740  *      Add or remove promiscuity from a device. While the count in the device
2741  *      remains above zero the interface remains promiscuous. Once it hits zero
2742  *      the device reverts back to normal filtering operation. A negative inc
2743  *      value is used to drop promiscuity on the device.
2744  */
2745 void dev_set_promiscuity(struct net_device *dev, int inc)
2746 {
2747         unsigned short old_flags = dev->flags;
2748
2749         __dev_set_promiscuity(dev, inc);
2750         if (dev->flags != old_flags)
2751                 dev_set_rx_mode(dev);
2752 }
2753
2754 /**
2755  *      dev_set_allmulti        - update allmulti count on a device
2756  *      @dev: device
2757  *      @inc: modifier
2758  *
2759  *      Add or remove reception of all multicast frames to a device. While the
2760  *      count in the device remains above zero the interface remains listening
2761  *      to all interfaces. Once it hits zero the device reverts back to normal
2762  *      filtering operation. A negative @inc value is used to drop the counter
2763  *      when releasing a resource needing all multicasts.
2764  */
2765
2766 void dev_set_allmulti(struct net_device *dev, int inc)
2767 {
2768         unsigned short old_flags = dev->flags;
2769
2770         ASSERT_RTNL();
2771
2772         dev->flags |= IFF_ALLMULTI;
2773         if ((dev->allmulti += inc) == 0)
2774                 dev->flags &= ~IFF_ALLMULTI;
2775         if (dev->flags ^ old_flags) {
2776                 if (dev->change_rx_flags)
2777                         dev->change_rx_flags(dev, IFF_ALLMULTI);
2778                 dev_set_rx_mode(dev);
2779         }
2780 }
2781
2782 /*
2783  *      Upload unicast and multicast address lists to device and
2784  *      configure RX filtering. When the device doesn't support unicast
2785  *      filtering it is put in promiscous mode while unicast addresses
2786  *      are present.
2787  */
2788 void __dev_set_rx_mode(struct net_device *dev)
2789 {
2790         /* dev_open will call this function so the list will stay sane. */
2791         if (!(dev->flags&IFF_UP))
2792                 return;
2793
2794         if (!netif_device_present(dev))
2795                 return;
2796
2797         if (dev->set_rx_mode)
2798                 dev->set_rx_mode(dev);
2799         else {
2800                 /* Unicast addresses changes may only happen under the rtnl,
2801                  * therefore calling __dev_set_promiscuity here is safe.
2802                  */
2803                 if (dev->uc_count > 0 && !dev->uc_promisc) {
2804                         __dev_set_promiscuity(dev, 1);
2805                         dev->uc_promisc = 1;
2806                 } else if (dev->uc_count == 0 && dev->uc_promisc) {
2807                         __dev_set_promiscuity(dev, -1);
2808                         dev->uc_promisc = 0;
2809                 }
2810
2811                 if (dev->set_multicast_list)
2812                         dev->set_multicast_list(dev);
2813         }
2814 }
2815
2816 void dev_set_rx_mode(struct net_device *dev)
2817 {
2818         netif_tx_lock_bh(dev);
2819         __dev_set_rx_mode(dev);
2820         netif_tx_unlock_bh(dev);
2821 }
2822
2823 int __dev_addr_delete(struct dev_addr_list **list, int *count,
2824                       void *addr, int alen, int glbl)
2825 {
2826         struct dev_addr_list *da;
2827
2828         for (; (da = *list) != NULL; list = &da->next) {
2829                 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
2830                     alen == da->da_addrlen) {
2831                         if (glbl) {
2832                                 int old_glbl = da->da_gusers;
2833                                 da->da_gusers = 0;
2834                                 if (old_glbl == 0)
2835                                         break;
2836                         }
2837                         if (--da->da_users)
2838                                 return 0;
2839
2840                         *list = da->next;
2841                         kfree(da);
2842                         (*count)--;
2843                         return 0;
2844                 }
2845         }
2846         return -ENOENT;
2847 }
2848
2849 int __dev_addr_add(struct dev_addr_list **list, int *count,
2850                    void *addr, int alen, int glbl)
2851 {
2852         struct dev_addr_list *da;
2853
2854         for (da = *list; da != NULL; da = da->next) {
2855                 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
2856                     da->da_addrlen == alen) {
2857                         if (glbl) {
2858                                 int old_glbl = da->da_gusers;
2859                                 da->da_gusers = 1;
2860                                 if (old_glbl)
2861                                         return 0;
2862                         }
2863                         da->da_users++;
2864                         return 0;
2865                 }
2866         }
2867
2868         da = kmalloc(sizeof(*da), GFP_ATOMIC);
2869         if (da == NULL)
2870                 return -ENOMEM;
2871         memcpy(da->da_addr, addr, alen);
2872         da->da_addrlen = alen;
2873         da->da_users = 1;
2874         da->da_gusers = glbl ? 1 : 0;
2875         da->next = *list;
2876         *list = da;
2877         (*count)++;
2878         return 0;
2879 }
2880
2881 /**
2882  *      dev_unicast_delete      - Release secondary unicast address.
2883  *      @dev: device
2884  *      @addr: address to delete
2885  *      @alen: length of @addr
2886  *
2887  *      Release reference to a secondary unicast address and remove it
2888  *      from the device if the reference count drops to zero.
2889  *
2890  *      The caller must hold the rtnl_mutex.
2891  */
2892 int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
2893 {
2894         int err;
2895
2896         ASSERT_RTNL();
2897
2898         netif_tx_lock_bh(dev);
2899         err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
2900         if (!err)
2901                 __dev_set_rx_mode(dev);
2902         netif_tx_unlock_bh(dev);
2903         return err;
2904 }
2905 EXPORT_SYMBOL(dev_unicast_delete);
2906
2907 /**
2908  *      dev_unicast_add         - add a secondary unicast address
2909  *      @dev: device
2910  *      @addr: address to delete
2911  *      @alen: length of @addr
2912  *
2913  *      Add a secondary unicast address to the device or increase
2914  *      the reference count if it already exists.
2915  *
2916  *      The caller must hold the rtnl_mutex.
2917  */
2918 int dev_unicast_add(struct net_device *dev, void *addr, int alen)
2919 {
2920         int err;
2921
2922         ASSERT_RTNL();
2923
2924         netif_tx_lock_bh(dev);
2925         err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
2926         if (!err)
2927                 __dev_set_rx_mode(dev);
2928         netif_tx_unlock_bh(dev);
2929         return err;
2930 }
2931 EXPORT_SYMBOL(dev_unicast_add);
2932
2933 static void __dev_addr_discard(struct dev_addr_list **list)
2934 {
2935         struct dev_addr_list *tmp;
2936
2937         while (*list != NULL) {
2938                 tmp = *list;
2939                 *list = tmp->next;
2940                 if (tmp->da_users > tmp->da_gusers)
2941                         printk("__dev_addr_discard: address leakage! "
2942                                "da_users=%d\n", tmp->da_users);
2943                 kfree(tmp);
2944         }
2945 }
2946
2947 static void dev_addr_discard(struct net_device *dev)
2948 {
2949         netif_tx_lock_bh(dev);
2950
2951         __dev_addr_discard(&dev->uc_list);
2952         dev->uc_count = 0;
2953
2954         __dev_addr_discard(&dev->mc_list);
2955         dev->mc_count = 0;
2956
2957         netif_tx_unlock_bh(dev);
2958 }
2959
2960 unsigned dev_get_flags(const struct net_device *dev)
2961 {
2962         unsigned flags;
2963
2964         flags = (dev->flags & ~(IFF_PROMISC |
2965                                 IFF_ALLMULTI |
2966                                 IFF_RUNNING |
2967                                 IFF_LOWER_UP |
2968                                 IFF_DORMANT)) |
2969                 (dev->gflags & (IFF_PROMISC |
2970                                 IFF_ALLMULTI));
2971
2972         if (netif_running(dev)) {
2973                 if (netif_oper_up(dev))
2974                         flags |= IFF_RUNNING;
2975                 if (netif_carrier_ok(dev))
2976                         flags |= IFF_LOWER_UP;
2977                 if (netif_dormant(dev))
2978                         flags |= IFF_DORMANT;
2979         }
2980
2981         return flags;
2982 }
2983
2984 int dev_change_flags(struct net_device *dev, unsigned flags)
2985 {
2986         int ret, changes;
2987         int old_flags = dev->flags;
2988
2989         ASSERT_RTNL();
2990
2991         /*
2992          *      Set the flags on our device.
2993          */
2994
2995         dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2996                                IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2997                                IFF_AUTOMEDIA)) |
2998                      (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2999                                     IFF_ALLMULTI));
3000
3001         /*
3002          *      Load in the correct multicast list now the flags have changed.
3003          */
3004
3005         if (dev->change_rx_flags && (dev->flags ^ flags) & IFF_MULTICAST)
3006                 dev->change_rx_flags(dev, IFF_MULTICAST);
3007
3008         dev_set_rx_mode(dev);
3009
3010         /*
3011          *      Have we downed the interface. We handle IFF_UP ourselves
3012          *      according to user attempts to set it, rather than blindly
3013          *      setting it.
3014          */
3015
3016         ret = 0;
3017         if ((old_flags ^ flags) & IFF_UP) {     /* Bit is different  ? */
3018                 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
3019
3020                 if (!ret)
3021                         dev_set_rx_mode(dev);
3022         }
3023
3024         if (dev->flags & IFF_UP &&
3025             ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
3026                                           IFF_VOLATILE)))
3027                 call_netdevice_notifiers(NETDEV_CHANGE, dev);
3028
3029         if ((flags ^ dev->gflags) & IFF_PROMISC) {
3030                 int inc = (flags & IFF_PROMISC) ? +1 : -1;
3031                 dev->gflags ^= IFF_PROMISC;
3032                 dev_set_promiscuity(dev, inc);
3033         }
3034
3035         /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
3036            is important. Some (broken) drivers set IFF_PROMISC, when
3037            IFF_ALLMULTI is requested not asking us and not reporting.
3038          */
3039         if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
3040                 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
3041                 dev->gflags ^= IFF_ALLMULTI;
3042                 dev_set_allmulti(dev, inc);
3043         }
3044
3045         /* Exclude state transition flags, already notified */
3046         changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
3047         if (changes)
3048                 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
3049
3050         return ret;
3051 }
3052
3053 int dev_set_mtu(struct net_device *dev, int new_mtu)
3054 {
3055         int err;
3056
3057         if (new_mtu == dev->mtu)
3058                 return 0;
3059
3060         /*      MTU must be positive.    */
3061         if (new_mtu < 0)
3062                 return -EINVAL;
3063
3064         if (!netif_device_present(dev))
3065                 return -ENODEV;
3066
3067         err = 0;
3068         if (dev->change_mtu)
3069                 err = dev->change_mtu(dev, new_mtu);
3070         else
3071                 dev->mtu = new_mtu;
3072         if (!err && dev->flags & IFF_UP)
3073                 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
3074         return err;
3075 }
3076
3077 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
3078 {
3079         int err;
3080
3081         if (!dev->set_mac_address)
3082                 return -EOPNOTSUPP;
3083         if (sa->sa_family != dev->type)
3084                 return -EINVAL;
3085         if (!netif_device_present(dev))
3086                 return -ENODEV;
3087         err = dev->set_mac_address(dev, sa);
3088         if (!err)
3089                 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3090         return err;
3091 }
3092
3093 /*
3094  *      Perform the SIOCxIFxxx calls.
3095  */
3096 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3097 {
3098         int err;
3099         struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3100
3101         if (!dev)
3102                 return -ENODEV;
3103
3104         switch (cmd) {
3105                 case SIOCGIFFLAGS:      /* Get interface flags */
3106                         ifr->ifr_flags = dev_get_flags(dev);
3107                         return 0;
3108
3109                 case SIOCSIFFLAGS:      /* Set interface flags */
3110                         return dev_change_flags(dev, ifr->ifr_flags);
3111
3112                 case SIOCGIFMETRIC:     /* Get the metric on the interface
3113                                            (currently unused) */
3114                         ifr->ifr_metric = 0;
3115                         return 0;
3116
3117                 case SIOCSIFMETRIC:     /* Set the metric on the interface
3118                                            (currently unused) */
3119                         return -EOPNOTSUPP;
3120
3121                 case SIOCGIFMTU:        /* Get the MTU of a device */
3122                         ifr->ifr_mtu = dev->mtu;
3123                         return 0;
3124
3125                 case SIOCSIFMTU:        /* Set the MTU of a device */
3126                         return dev_set_mtu(dev, ifr->ifr_mtu);
3127
3128                 case SIOCGIFHWADDR:
3129                         if (!dev->addr_len)
3130                                 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
3131                         else
3132                                 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
3133                                        min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3134                         ifr->ifr_hwaddr.sa_family = dev->type;
3135                         return 0;
3136
3137                 case SIOCSIFHWADDR:
3138                         return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
3139
3140                 case SIOCSIFHWBROADCAST:
3141                         if (ifr->ifr_hwaddr.sa_family != dev->type)
3142                                 return -EINVAL;
3143                         memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
3144                                min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3145                         call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3146                         return 0;
3147
3148                 case SIOCGIFMAP:
3149                         ifr->ifr_map.mem_start = dev->mem_start;
3150                         ifr->ifr_map.mem_end   = dev->mem_end;
3151                         ifr->ifr_map.base_addr = dev->base_addr;
3152                         ifr->ifr_map.irq       = dev->irq;
3153                         ifr->ifr_map.dma       = dev->dma;
3154                         ifr->ifr_map.port      = dev->if_port;
3155                         return 0;
3156
3157                 case SIOCSIFMAP:
3158                         if (dev->set_config) {
3159                                 if (!netif_device_present(dev))
3160                                         return -ENODEV;
3161                                 return dev->set_config(dev, &ifr->ifr_map);
3162                         }
3163                         return -EOPNOTSUPP;
3164
3165                 case SIOCADDMULTI:
3166                         if (!dev->set_multicast_list ||
3167                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3168                                 return -EINVAL;
3169                         if (!netif_device_present(dev))
3170                                 return -ENODEV;
3171                         return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
3172                                           dev->addr_len, 1);
3173
3174                 case SIOCDELMULTI:
3175                         if (!dev->set_multicast_list ||
3176                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3177                                 return -EINVAL;
3178                         if (!netif_device_present(dev))
3179                                 return -ENODEV;
3180                         return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
3181                                              dev->addr_len, 1);
3182
3183                 case SIOCGIFINDEX:
3184                         ifr->ifr_ifindex = dev->ifindex;
3185                         return 0;
3186
3187                 case SIOCGIFTXQLEN:
3188                         ifr->ifr_qlen = dev->tx_queue_len;
3189                         return 0;
3190
3191                 case SIOCSIFTXQLEN:
3192                         if (ifr->ifr_qlen < 0)
3193                                 return -EINVAL;
3194                         dev->tx_queue_len = ifr->ifr_qlen;
3195                         return 0;
3196
3197                 case SIOCSIFNAME:
3198                         ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3199                         return dev_change_name(dev, ifr->ifr_newname);
3200
3201                 /*
3202                  *      Unknown or private ioctl
3203                  */
3204
3205                 default:
3206                         if ((cmd >= SIOCDEVPRIVATE &&
3207                             cmd <= SIOCDEVPRIVATE + 15) ||
3208                             cmd == SIOCBONDENSLAVE ||
3209                             cmd == SIOCBONDRELEASE ||
3210                             cmd == SIOCBONDSETHWADDR ||
3211                             cmd == SIOCBONDSLAVEINFOQUERY ||
3212                             cmd == SIOCBONDINFOQUERY ||
3213                             cmd == SIOCBONDCHANGEACTIVE ||
3214                             cmd == SIOCGMIIPHY ||
3215                             cmd == SIOCGMIIREG ||
3216                             cmd == SIOCSMIIREG ||
3217                             cmd == SIOCBRADDIF ||
3218                             cmd == SIOCBRDELIF ||
3219                             cmd == SIOCWANDEV) {
3220                                 err = -EOPNOTSUPP;
3221                                 if (dev->do_ioctl) {
3222                                         if (netif_device_present(dev))
3223                                                 err = dev->do_ioctl(dev, ifr,
3224                                                                     cmd);
3225                                         else
3226                                                 err = -ENODEV;
3227                                 }
3228                         } else
3229                                 err = -EINVAL;
3230
3231         }
3232         return err;
3233 }
3234
3235 /*
3236  *      This function handles all "interface"-type I/O control requests. The actual
3237  *      'doing' part of this is dev_ifsioc above.
3238  */
3239
3240 /**
3241  *      dev_ioctl       -       network device ioctl
3242  *      @cmd: command to issue
3243  *      @arg: pointer to a struct ifreq in user space
3244  *
3245  *      Issue ioctl functions to devices. This is normally called by the
3246  *      user space syscall interfaces but can sometimes be useful for
3247  *      other purposes. The return value is the return from the syscall if
3248  *      positive or a negative errno code on error.
3249  */
3250
3251 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3252 {
3253         struct ifreq ifr;
3254         int ret;
3255         char *colon;
3256
3257         /* One special case: SIOCGIFCONF takes ifconf argument
3258            and requires shared lock, because it sleeps writing
3259            to user space.
3260          */
3261
3262         if (cmd == SIOCGIFCONF) {
3263                 rtnl_lock();
3264                 ret = dev_ifconf(net, (char __user *) arg);
3265                 rtnl_unlock();
3266                 return ret;
3267         }
3268         if (cmd == SIOCGIFNAME)
3269                 return dev_ifname(net, (struct ifreq __user *)arg);
3270
3271         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3272                 return -EFAULT;
3273
3274         ifr.ifr_name[IFNAMSIZ-1] = 0;
3275
3276         colon = strchr(ifr.ifr_name, ':');
3277         if (colon)
3278                 *colon = 0;
3279
3280         /*
3281          *      See which interface the caller is talking about.
3282          */
3283
3284         switch (cmd) {
3285                 /*
3286                  *      These ioctl calls:
3287                  *      - can be done by all.
3288                  *      - atomic and do not require locking.
3289                  *      - return a value
3290                  */
3291                 case SIOCGIFFLAGS:
3292                 case SIOCGIFMETRIC:
3293                 case SIOCGIFMTU:
3294                 case SIOCGIFHWADDR:
3295                 case SIOCGIFSLAVE:
3296                 case SIOCGIFMAP:
3297                 case SIOCGIFINDEX:
3298                 case SIOCGIFTXQLEN:
3299                         dev_load(net, ifr.ifr_name);
3300                         read_lock(&dev_base_lock);
3301                         ret = dev_ifsioc(net, &ifr, cmd);
3302                         read_unlock(&dev_base_lock);
3303                         if (!ret) {
3304                                 if (colon)
3305                                         *colon = ':';
3306                                 if (copy_to_user(arg, &ifr,
3307                                                  sizeof(struct ifreq)))
3308                                         ret = -EFAULT;
3309                         }
3310                         return ret;
3311
3312                 case SIOCETHTOOL:
3313                         dev_load(net, ifr.ifr_name);
3314                         rtnl_lock();
3315                         ret = dev_ethtool(net, &ifr);
3316                         rtnl_unlock();
3317                         if (!ret) {
3318                                 if (colon)
3319                                         *colon = ':';
3320                                 if (copy_to_user(arg, &ifr,
3321                                                  sizeof(struct ifreq)))
3322                                         ret = -EFAULT;
3323                         }
3324                         return ret;
3325
3326                 /*
3327                  *      These ioctl calls:
3328                  *      - require superuser power.
3329                  *      - require strict serialization.
3330                  *      - return a value
3331                  */
3332                 case SIOCGMIIPHY:
3333                 case SIOCGMIIREG:
3334                 case SIOCSIFNAME:
3335                         if (!capable(CAP_NET_ADMIN))
3336                                 return -EPERM;
3337                         dev_load(net, ifr.ifr_name);
3338                         rtnl_lock();
3339                         ret = dev_ifsioc(net, &ifr, cmd);
3340                         rtnl_unlock();
3341                         if (!ret) {
3342                                 if (colon)
3343                                         *colon = ':';
3344                                 if (copy_to_user(arg, &ifr,
3345                                                  sizeof(struct ifreq)))
3346                                         ret = -EFAULT;
3347                         }
3348                         return ret;
3349
3350                 /*
3351                  *      These ioctl calls:
3352                  *      - require superuser power.
3353                  *      - require strict serialization.
3354                  *      - do not return a value
3355                  */
3356                 case SIOCSIFFLAGS:
3357                 case SIOCSIFMETRIC:
3358                 case SIOCSIFMTU:
3359                 case SIOCSIFMAP:
3360                 case SIOCSIFHWADDR:
3361                 case SIOCSIFSLAVE:
3362                 case SIOCADDMULTI:
3363                 case SIOCDELMULTI:
3364                 case SIOCSIFHWBROADCAST:
3365                 case SIOCSIFTXQLEN:
3366                 case SIOCSMIIREG:
3367                 case SIOCBONDENSLAVE:
3368                 case SIOCBONDRELEASE:
3369                 case SIOCBONDSETHWADDR:
3370                 case SIOCBONDCHANGEACTIVE:
3371                 case SIOCBRADDIF:
3372                 case SIOCBRDELIF:
3373                         if (!capable(CAP_NET_ADMIN))
3374                                 return -EPERM;
3375                         /* fall through */
3376                 case SIOCBONDSLAVEINFOQUERY:
3377                 case SIOCBONDINFOQUERY:
3378                         dev_load(net, ifr.ifr_name);
3379                         rtnl_lock();
3380                         ret = dev_ifsioc(net, &ifr, cmd);
3381                         rtnl_unlock();
3382                         return ret;
3383
3384                 case SIOCGIFMEM:
3385                         /* Get the per device memory space. We can add this but
3386                          * currently do not support it */
3387                 case SIOCSIFMEM:
3388                         /* Set the per device memory buffer space.
3389                          * Not applicable in our case */
3390                 case SIOCSIFLINK:
3391                         return -EINVAL;
3392
3393                 /*
3394                  *      Unknown or private ioctl.
3395                  */
3396                 default:
3397                         if (cmd == SIOCWANDEV ||
3398                             (cmd >= SIOCDEVPRIVATE &&
3399                              cmd <= SIOCDEVPRIVATE + 15)) {
3400                                 dev_load(net, ifr.ifr_name);
3401                                 rtnl_lock();
3402                                 ret = dev_ifsioc(net, &ifr, cmd);
3403                                 rtnl_unlock();
3404                                 if (!ret && copy_to_user(arg, &ifr,
3405                                                          sizeof(struct ifreq)))
3406                                         ret = -EFAULT;
3407                                 return ret;
3408                         }
3409                         /* Take care of Wireless Extensions */
3410                         if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
3411                                 return wext_handle_ioctl(net, &ifr, cmd, arg);
3412                         return -EINVAL;
3413         }
3414 }
3415
3416
3417 /**
3418  *      dev_new_index   -       allocate an ifindex
3419  *
3420  *      Returns a suitable unique value for a new device interface
3421  *      number.  The caller must hold the rtnl semaphore or the
3422  *      dev_base_lock to be sure it remains unique.
3423  */
3424 static int dev_new_index(struct net *net)
3425 {
3426         static int ifindex;
3427         for (;;) {
3428                 if (++ifindex <= 0)
3429                         ifindex = 1;
3430                 if (!__dev_get_by_index(net, ifindex))
3431                         return ifindex;
3432         }
3433 }
3434
3435 /* Delayed registration/unregisteration */
3436 static DEFINE_SPINLOCK(net_todo_list_lock);
3437 static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
3438
3439 static void net_set_todo(struct net_device *dev)
3440 {
3441         spin_lock(&net_todo_list_lock);
3442         list_add_tail(&dev->todo_list, &net_todo_list);
3443         spin_unlock(&net_todo_list_lock);
3444 }
3445
3446 /**
3447  *      register_netdevice      - register a network device
3448  *      @dev: device to register
3449  *
3450  *      Take a completed network device structure and add it to the kernel
3451  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3452  *      chain. 0 is returned on success. A negative errno code is returned
3453  *      on a failure to set up the device, or if the name is a duplicate.
3454  *
3455  *      Callers must hold the rtnl semaphore. You may want
3456  *      register_netdev() instead of this.
3457  *
3458  *      BUGS:
3459  *      The locking appears insufficient to guarantee two parallel registers
3460  *      will not get the same name.
3461  */
3462
3463 int register_netdevice(struct net_device *dev)
3464 {
3465         struct hlist_head *head;
3466         struct hlist_node *p;
3467         int ret;
3468         struct net *net;
3469
3470         BUG_ON(dev_boot_phase);
3471         ASSERT_RTNL();
3472
3473         might_sleep();
3474
3475         /* When net_device's are persistent, this will be fatal. */
3476         BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
3477         BUG_ON(!dev->nd_net);
3478         net = dev->nd_net;
3479
3480         spin_lock_init(&dev->queue_lock);
3481         spin_lock_init(&dev->_xmit_lock);
3482         netdev_set_lockdep_class(&dev->_xmit_lock, dev->type);
3483         dev->xmit_lock_owner = -1;
3484         spin_lock_init(&dev->ingress_lock);
3485
3486         dev->iflink = -1;
3487
3488         /* Init, if this function is available */
3489         if (dev->init) {
3490                 ret = dev->init(dev);
3491                 if (ret) {
3492                         if (ret > 0)
3493                                 ret = -EIO;
3494                         goto out;
3495                 }
3496         }
3497
3498         if (!dev_valid_name(dev->name)) {
3499                 ret = -EINVAL;
3500                 goto err_uninit;
3501         }
3502
3503         dev->ifindex = dev_new_index(net);
3504         if (dev->iflink == -1)
3505                 dev->iflink = dev->ifindex;
3506
3507         /* Check for existence of name */
3508         head = dev_name_hash(net, dev->name);
3509         hlist_for_each(p, head) {
3510                 struct net_device *d
3511                         = hlist_entry(p, struct net_device, name_hlist);
3512                 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
3513                         ret = -EEXIST;
3514                         goto err_uninit;
3515                 }
3516         }
3517
3518         /* Fix illegal checksum combinations */
3519         if ((dev->features & NETIF_F_HW_CSUM) &&
3520             (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
3521                 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
3522                        dev->name);
3523                 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
3524         }
3525
3526         if ((dev->features & NETIF_F_NO_CSUM) &&
3527             (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
3528                 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
3529                        dev->name);
3530                 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
3531         }
3532
3533
3534         /* Fix illegal SG+CSUM combinations. */
3535         if ((dev->features & NETIF_F_SG) &&
3536             !(dev->features & NETIF_F_ALL_CSUM)) {
3537                 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
3538                        dev->name);
3539                 dev->features &= ~NETIF_F_SG;
3540         }
3541
3542         /* TSO requires that SG is present as well. */
3543         if ((dev->features & NETIF_F_TSO) &&
3544             !(dev->features & NETIF_F_SG)) {
3545                 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
3546                        dev->name);
3547                 dev->features &= ~NETIF_F_TSO;
3548         }
3549         if (dev->features & NETIF_F_UFO) {
3550                 if (!(dev->features & NETIF_F_HW_CSUM)) {
3551                         printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3552                                         "NETIF_F_HW_CSUM feature.\n",
3553                                                         dev->name);
3554                         dev->features &= ~NETIF_F_UFO;
3555                 }
3556                 if (!(dev->features & NETIF_F_SG)) {
3557                         printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3558                                         "NETIF_F_SG feature.\n",
3559                                         dev->name);
3560                         dev->features &= ~NETIF_F_UFO;
3561                 }
3562         }
3563
3564         /*
3565          *      nil rebuild_header routine,
3566          *      that should be never called and used as just bug trap.
3567          */
3568
3569         if (!dev->rebuild_header)
3570                 dev->rebuild_header = default_rebuild_header;
3571
3572         ret = netdev_register_kobject(dev);
3573         if (ret)
3574                 goto err_uninit;
3575         dev->reg_state = NETREG_REGISTERED;
3576
3577         /*
3578          *      Default initial state at registry is that the
3579          *      device is present.
3580          */
3581
3582         set_bit(__LINK_STATE_PRESENT, &dev->state);
3583
3584         dev_init_scheduler(dev);
3585         dev_hold(dev);
3586         list_netdevice(dev);
3587
3588         /* Notify protocols, that a new device appeared. */
3589         ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
3590         ret = notifier_to_errno(ret);
3591         if (ret)
3592                 unregister_netdevice(dev);
3593
3594 out:
3595         return ret;
3596
3597 err_uninit:
3598         if (dev->uninit)
3599                 dev->uninit(dev);
3600         goto out;
3601 }
3602
3603 /**
3604  *      register_netdev - register a network device
3605  *      @dev: device to register
3606  *
3607  *      Take a completed network device structure and add it to the kernel
3608  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3609  *      chain. 0 is returned on success. A negative errno code is returned
3610  *      on a failure to set up the device, or if the name is a duplicate.
3611  *
3612  *      This is a wrapper around register_netdevice that takes the rtnl semaphore
3613  *      and expands the device name if you passed a format string to
3614  *      alloc_netdev.
3615  */
3616 int register_netdev(struct net_device *dev)
3617 {
3618         int err;
3619
3620         rtnl_lock();
3621
3622         /*
3623          * If the name is a format string the caller wants us to do a
3624          * name allocation.
3625          */
3626         if (strchr(dev->name, '%')) {
3627                 err = dev_alloc_name(dev, dev->name);
3628                 if (err < 0)
3629                         goto out;
3630         }
3631
3632         err = register_netdevice(dev);
3633 out:
3634         rtnl_unlock();
3635         return err;
3636 }
3637 EXPORT_SYMBOL(register_netdev);
3638
3639 /*
3640  * netdev_wait_allrefs - wait until all references are gone.
3641  *
3642  * This is called when unregistering network devices.
3643  *
3644  * Any protocol or device that holds a reference should register
3645  * for netdevice notification, and cleanup and put back the
3646  * reference if they receive an UNREGISTER event.
3647  * We can get stuck here if buggy protocols don't correctly
3648  * call dev_put.
3649  */
3650 static void netdev_wait_allrefs(struct net_device *dev)
3651 {
3652         unsigned long rebroadcast_time, warning_time;
3653
3654         rebroadcast_time = warning_time = jiffies;
3655         while (atomic_read(&dev->refcnt) != 0) {
3656                 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
3657                         rtnl_lock();
3658
3659                         /* Rebroadcast unregister notification */
3660                         call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
3661
3662                         if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
3663                                      &dev->state)) {
3664                                 /* We must not have linkwatch events
3665                                  * pending on unregister. If this
3666                                  * happens, we simply run the queue
3667                                  * unscheduled, resulting in a noop
3668                                  * for this device.
3669                                  */
3670                                 linkwatch_run_queue();
3671                         }
3672
3673                         __rtnl_unlock();
3674
3675                         rebroadcast_time = jiffies;
3676                 }
3677
3678                 msleep(250);
3679
3680                 if (time_after(jiffies, warning_time + 10 * HZ)) {
3681                         printk(KERN_EMERG "unregister_netdevice: "
3682                                "waiting for %s to become free. Usage "
3683                                "count = %d\n",
3684                                dev->name, atomic_read(&dev->refcnt));
3685                         warning_time = jiffies;
3686                 }
3687         }
3688 }
3689
3690 /* The sequence is:
3691  *
3692  *      rtnl_lock();
3693  *      ...
3694  *      register_netdevice(x1);
3695  *      register_netdevice(x2);
3696  *      ...
3697  *      unregister_netdevice(y1);
3698  *      unregister_netdevice(y2);
3699  *      ...
3700  *      rtnl_unlock();
3701  *      free_netdev(y1);
3702  *      free_netdev(y2);
3703  *
3704  * We are invoked by rtnl_unlock() after it drops the semaphore.
3705  * This allows us to deal with problems:
3706  * 1) We can delete sysfs objects which invoke hotplug
3707  *    without deadlocking with linkwatch via keventd.
3708  * 2) Since we run with the RTNL semaphore not held, we can sleep
3709  *    safely in order to wait for the netdev refcnt to drop to zero.
3710  */
3711 static DEFINE_MUTEX(net_todo_run_mutex);
3712 void netdev_run_todo(void)
3713 {
3714         struct list_head list;
3715
3716         /* Need to guard against multiple cpu's getting out of order. */
3717         mutex_lock(&net_todo_run_mutex);
3718
3719         /* Not safe to do outside the semaphore.  We must not return
3720          * until all unregister events invoked by the local processor
3721          * have been completed (either by this todo run, or one on
3722          * another cpu).
3723          */
3724         if (list_empty(&net_todo_list))
3725                 goto out;
3726
3727         /* Snapshot list, allow later requests */
3728         spin_lock(&net_todo_list_lock);
3729         list_replace_init(&net_todo_list, &list);
3730         spin_unlock(&net_todo_list_lock);
3731
3732         while (!list_empty(&list)) {
3733                 struct net_device *dev
3734                         = list_entry(list.next, struct net_device, todo_list);
3735                 list_del(&dev->todo_list);
3736
3737                 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
3738                         printk(KERN_ERR "network todo '%s' but state %d\n",
3739                                dev->name, dev->reg_state);
3740                         dump_stack();
3741                         continue;
3742                 }
3743
3744                 dev->reg_state = NETREG_UNREGISTERED;
3745
3746                 netdev_wait_allrefs(dev);
3747
3748                 /* paranoia */
3749                 BUG_ON(atomic_read(&dev->refcnt));
3750                 BUG_TRAP(!dev->ip_ptr);
3751                 BUG_TRAP(!dev->ip6_ptr);
3752                 BUG_TRAP(!dev->dn_ptr);
3753
3754                 if (dev->destructor)
3755                         dev->destructor(dev);
3756
3757                 /* Free network device */
3758                 kobject_put(&dev->dev.kobj);
3759         }
3760
3761 out:
3762         mutex_unlock(&net_todo_run_mutex);
3763 }
3764
3765 static struct net_device_stats *internal_stats(struct net_device *dev)
3766 {
3767         return &dev->stats;
3768 }
3769
3770 /**
3771  *      alloc_netdev_mq - allocate network device
3772  *      @sizeof_priv:   size of private data to allocate space for
3773  *      @name:          device name format string
3774  *      @setup:         callback to initialize device
3775  *      @queue_count:   the number of subqueues to allocate
3776  *
3777  *      Allocates a struct net_device with private data area for driver use
3778  *      and performs basic initialization.  Also allocates subquue structs
3779  *      for each queue on the device at the end of the netdevice.
3780  */
3781 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
3782                 void (*setup)(struct net_device *), unsigned int queue_count)
3783 {
3784         void *p;
3785         struct net_device *dev;
3786         int alloc_size;
3787
3788         BUG_ON(strlen(name) >= sizeof(dev->name));
3789
3790         /* ensure 32-byte alignment of both the device and private area */
3791         alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST +
3792                      (sizeof(struct net_device_subqueue) * (queue_count - 1))) &
3793                      ~NETDEV_ALIGN_CONST;
3794         alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
3795
3796         p = kzalloc(alloc_size, GFP_KERNEL);
3797         if (!p) {
3798                 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
3799                 return NULL;
3800         }
3801
3802         dev = (struct net_device *)
3803                 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
3804         dev->padded = (char *)dev - (char *)p;
3805         dev->nd_net = &init_net;
3806
3807         if (sizeof_priv) {
3808                 dev->priv = ((char *)dev +
3809                              ((sizeof(struct net_device) +
3810                                (sizeof(struct net_device_subqueue) *
3811                                 (queue_count - 1)) + NETDEV_ALIGN_CONST)
3812                               & ~NETDEV_ALIGN_CONST));
3813         }
3814
3815         dev->egress_subqueue_count = queue_count;
3816
3817         dev->get_stats = internal_stats;
3818         netpoll_netdev_init(dev);
3819         setup(dev);
3820         strcpy(dev->name, name);
3821         return dev;
3822 }
3823 EXPORT_SYMBOL(alloc_netdev_mq);
3824
3825 /**
3826  *      free_netdev - free network device
3827  *      @dev: device
3828  *
3829  *      This function does the last stage of destroying an allocated device
3830  *      interface. The reference to the device object is released.
3831  *      If this is the last reference then it will be freed.
3832  */
3833 void free_netdev(struct net_device *dev)
3834 {
3835         /*  Compatibility with error handling in drivers */
3836         if (dev->reg_state == NETREG_UNINITIALIZED) {
3837                 kfree((char *)dev - dev->padded);
3838                 return;
3839         }
3840
3841         BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3842         dev->reg_state = NETREG_RELEASED;
3843
3844         /* will free via device release */
3845         put_device(&dev->dev);
3846 }
3847
3848 /* Synchronize with packet receive processing. */
3849 void synchronize_net(void)
3850 {
3851         might_sleep();
3852         synchronize_rcu();
3853 }
3854
3855 /**
3856  *      unregister_netdevice - remove device from the kernel
3857  *      @dev: device
3858  *
3859  *      This function shuts down a device interface and removes it
3860  *      from the kernel tables. On success 0 is returned, on a failure
3861  *      a negative errno code is returned.
3862  *
3863  *      Callers must hold the rtnl semaphore.  You may want
3864  *      unregister_netdev() instead of this.
3865  */
3866
3867 void unregister_netdevice(struct net_device *dev)
3868 {
3869         BUG_ON(dev_boot_phase);
3870         ASSERT_RTNL();
3871
3872         /* Some devices call without registering for initialization unwind. */
3873         if (dev->reg_state == NETREG_UNINITIALIZED) {
3874                 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3875                                   "was registered\n", dev->name, dev);
3876
3877                 WARN_ON(1);
3878                 return;
3879         }
3880
3881         BUG_ON(dev->reg_state != NETREG_REGISTERED);
3882
3883         /* If device is running, close it first. */
3884         if (dev->flags & IFF_UP)
3885                 dev_close(dev);
3886
3887         /* And unlink it from device chain. */
3888         unlist_netdevice(dev);
3889
3890         dev->reg_state = NETREG_UNREGISTERING;
3891
3892         synchronize_net();
3893
3894         /* Shutdown queueing discipline. */
3895         dev_shutdown(dev);
3896
3897
3898         /* Notify protocols, that we are about to destroy
3899            this device. They should clean all the things.
3900         */
3901         call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
3902
3903         /*
3904          *      Flush the unicast and multicast chains
3905          */
3906         dev_addr_discard(dev);
3907
3908         if (dev->uninit)
3909                 dev->uninit(dev);
3910
3911         /* Notifier chain MUST detach us from master device. */
3912         BUG_TRAP(!dev->master);
3913
3914         /* Remove entries from kobject tree */
3915         netdev_unregister_kobject(dev);
3916
3917         /* Finish processing unregister after unlock */
3918         net_set_todo(dev);
3919
3920         synchronize_net();
3921
3922         dev_put(dev);
3923 }
3924
3925 /**
3926  *      unregister_netdev - remove device from the kernel
3927  *      @dev: device
3928  *
3929  *      This function shuts down a device interface and removes it
3930  *      from the kernel tables. On success 0 is returned, on a failure
3931  *      a negative errno code is returned.
3932  *
3933  *      This is just a wrapper for unregister_netdevice that takes
3934  *      the rtnl semaphore.  In general you want to use this and not
3935  *      unregister_netdevice.
3936  */
3937 void unregister_netdev(struct net_device *dev)
3938 {
3939         rtnl_lock();
3940         unregister_netdevice(dev);
3941         rtnl_unlock();
3942 }
3943
3944 EXPORT_SYMBOL(unregister_netdev);
3945
3946 /**
3947  *      dev_change_net_namespace - move device to different nethost namespace
3948  *      @dev: device
3949  *      @net: network namespace
3950  *      @pat: If not NULL name pattern to try if the current device name
3951  *            is already taken in the destination network namespace.
3952  *
3953  *      This function shuts down a device interface and moves it
3954  *      to a new network namespace. On success 0 is returned, on
3955  *      a failure a netagive errno code is returned.
3956  *
3957  *      Callers must hold the rtnl semaphore.
3958  */
3959
3960 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
3961 {
3962         char buf[IFNAMSIZ];
3963         const char *destname;
3964         int err;
3965
3966         ASSERT_RTNL();
3967
3968         /* Don't allow namespace local devices to be moved. */
3969         err = -EINVAL;
3970         if (dev->features & NETIF_F_NETNS_LOCAL)
3971                 goto out;
3972
3973         /* Ensure the device has been registrered */
3974         err = -EINVAL;
3975         if (dev->reg_state != NETREG_REGISTERED)
3976                 goto out;
3977
3978         /* Get out if there is nothing todo */
3979         err = 0;
3980         if (dev->nd_net == net)
3981                 goto out;
3982
3983         /* Pick the destination device name, and ensure
3984          * we can use it in the destination network namespace.
3985          */
3986         err = -EEXIST;
3987         destname = dev->name;
3988         if (__dev_get_by_name(net, destname)) {
3989                 /* We get here if we can't use the current device name */
3990                 if (!pat)
3991                         goto out;
3992                 if (!dev_valid_name(pat))
3993                         goto out;
3994                 if (strchr(pat, '%')) {
3995                         if (__dev_alloc_name(net, pat, buf) < 0)
3996                                 goto out;
3997                         destname = buf;
3998                 } else
3999                         destname = pat;
4000                 if (__dev_get_by_name(net, destname))
4001                         goto out;
4002         }
4003
4004         /*
4005          * And now a mini version of register_netdevice unregister_netdevice.
4006          */
4007
4008         /* If device is running close it first. */
4009         if (dev->flags & IFF_UP)
4010                 dev_close(dev);
4011
4012         /* And unlink it from device chain */
4013         err = -ENODEV;
4014         unlist_netdevice(dev);
4015
4016         synchronize_net();
4017
4018         /* Shutdown queueing discipline. */
4019         dev_shutdown(dev);
4020
4021         /* Notify protocols, that we are about to destroy
4022            this device. They should clean all the things.
4023         */
4024         call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4025
4026         /*
4027          *      Flush the unicast and multicast chains
4028          */
4029         dev_addr_discard(dev);
4030
4031         /* Actually switch the network namespace */
4032         dev->nd_net = net;
4033
4034         /* Assign the new device name */
4035         if (destname != dev->name)
4036                 strcpy(dev->name, destname);
4037
4038         /* If there is an ifindex conflict assign a new one */
4039         if (__dev_get_by_index(net, dev->ifindex)) {
4040                 int iflink = (dev->iflink == dev->ifindex);
4041                 dev->ifindex = dev_new_index(net);
4042                 if (iflink)
4043                         dev->iflink = dev->ifindex;
4044         }
4045
4046         /* Fixup kobjects */
4047         err = device_rename(&dev->dev, dev->name);
4048         WARN_ON(err);
4049
4050         /* Add the device back in the hashes */
4051         list_netdevice(dev);
4052
4053         /* Notify protocols, that a new device appeared. */
4054         call_netdevice_notifiers(NETDEV_REGISTER, dev);
4055
4056         synchronize_net();
4057         err = 0;
4058 out:
4059         return err;
4060 }
4061
4062 static int dev_cpu_callback(struct notifier_block *nfb,
4063                             unsigned long action,
4064                             void *ocpu)
4065 {
4066         struct sk_buff **list_skb;
4067         struct net_device **list_net;
4068         struct sk_buff *skb;
4069         unsigned int cpu, oldcpu = (unsigned long)ocpu;
4070         struct softnet_data *sd, *oldsd;
4071
4072         if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
4073                 return NOTIFY_OK;
4074
4075         local_irq_disable();
4076         cpu = smp_processor_id();
4077         sd = &per_cpu(softnet_data, cpu);
4078         oldsd = &per_cpu(softnet_data, oldcpu);
4079
4080         /* Find end of our completion_queue. */
4081         list_skb = &sd->completion_queue;
4082         while (*list_skb)
4083                 list_skb = &(*list_skb)->next;
4084         /* Append completion queue from offline CPU. */
4085         *list_skb = oldsd->completion_queue;
4086         oldsd->completion_queue = NULL;
4087
4088         /* Find end of our output_queue. */
4089         list_net = &sd->output_queue;
4090         while (*list_net)
4091                 list_net = &(*list_net)->next_sched;
4092         /* Append output queue from offline CPU. */
4093         *list_net = oldsd->output_queue;
4094         oldsd->output_queue = NULL;
4095
4096         raise_softirq_irqoff(NET_TX_SOFTIRQ);
4097         local_irq_enable();
4098
4099         /* Process offline CPU's input_pkt_queue */
4100         while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
4101                 netif_rx(skb);
4102
4103         return NOTIFY_OK;
4104 }
4105
4106 #ifdef CONFIG_NET_DMA
4107 /**
4108  * net_dma_rebalance - try to maintain one DMA channel per CPU
4109  * @net_dma: DMA client and associated data (lock, channels, channel_mask)
4110  *
4111  * This is called when the number of channels allocated to the net_dma client
4112  * changes.  The net_dma client tries to have one DMA channel per CPU.
4113  */
4114
4115 static void net_dma_rebalance(struct net_dma *net_dma)
4116 {
4117         unsigned int cpu, i, n, chan_idx;
4118         struct dma_chan *chan;
4119
4120         if (cpus_empty(net_dma->channel_mask)) {
4121                 for_each_online_cpu(cpu)
4122                         rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
4123                 return;
4124         }
4125
4126         i = 0;
4127         cpu = first_cpu(cpu_online_map);
4128
4129         for_each_cpu_mask(chan_idx, net_dma->channel_mask) {
4130                 chan = net_dma->channels[chan_idx];
4131
4132                 n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
4133                    + (i < (num_online_cpus() %
4134                         cpus_weight(net_dma->channel_mask)) ? 1 : 0));
4135
4136                 while(n) {
4137                         per_cpu(softnet_data, cpu).net_dma = chan;
4138                         cpu = next_cpu(cpu, cpu_online_map);
4139                         n--;
4140                 }
4141                 i++;
4142         }
4143 }
4144
4145 /**
4146  * netdev_dma_event - event callback for the net_dma_client
4147  * @client: should always be net_dma_client
4148  * @chan: DMA channel for the event
4149  * @state: DMA state to be handled
4150  */
4151 static enum dma_state_client
4152 netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
4153         enum dma_state state)
4154 {
4155         int i, found = 0, pos = -1;
4156         struct net_dma *net_dma =
4157                 container_of(client, struct net_dma, client);
4158         enum dma_state_client ack = DMA_DUP; /* default: take no action */
4159
4160         spin_lock(&net_dma->lock);
4161         switch (state) {
4162         case DMA_RESOURCE_AVAILABLE:
4163                 for (i = 0; i < NR_CPUS; i++)
4164                         if (net_dma->channels[i] == chan) {
4165                                 found = 1;
4166                                 break;
4167                         } else if (net_dma->channels[i] == NULL && pos < 0)
4168                                 pos = i;
4169
4170                 if (!found && pos >= 0) {
4171                         ack = DMA_ACK;
4172                         net_dma->channels[pos] = chan;
4173                         cpu_set(pos, net_dma->channel_mask);
4174                         net_dma_rebalance(net_dma);
4175                 }
4176                 break;
4177         case DMA_RESOURCE_REMOVED:
4178                 for (i = 0; i < NR_CPUS; i++)
4179                         if (net_dma->channels[i] == chan) {
4180                                 found = 1;
4181                                 pos = i;
4182                                 break;
4183                         }
4184
4185                 if (found) {
4186                         ack = DMA_ACK;
4187                         cpu_clear(pos, net_dma->channel_mask);
4188                         net_dma->channels[i] = NULL;
4189                         net_dma_rebalance(net_dma);
4190                 }
4191                 break;
4192         default:
4193                 break;
4194         }
4195         spin_unlock(&net_dma->lock);
4196
4197         return ack;
4198 }
4199
4200 /**
4201  * netdev_dma_regiser - register the networking subsystem as a DMA client
4202  */
4203 static int __init netdev_dma_register(void)
4204 {
4205         spin_lock_init(&net_dma.lock);
4206         dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
4207         dma_async_client_register(&net_dma.client);
4208         dma_async_client_chan_request(&net_dma.client);
4209         return 0;
4210 }
4211
4212 #else
4213 static int __init netdev_dma_register(void) { return -ENODEV; }
4214 #endif /* CONFIG_NET_DMA */
4215
4216 /**
4217  *      netdev_compute_feature - compute conjunction of two feature sets
4218  *      @all: first feature set
4219  *      @one: second feature set
4220  *
4221  *      Computes a new feature set after adding a device with feature set
4222  *      @one to the master device with current feature set @all.  Returns
4223  *      the new feature set.
4224  */
4225 int netdev_compute_features(unsigned long all, unsigned long one)
4226 {
4227         /* if device needs checksumming, downgrade to hw checksumming */
4228         if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
4229                 all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM;
4230
4231         /* if device can't do all checksum, downgrade to ipv4/ipv6 */
4232         if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM))
4233                 all ^= NETIF_F_HW_CSUM
4234                         | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
4235
4236         if (one & NETIF_F_GSO)
4237                 one |= NETIF_F_GSO_SOFTWARE;
4238         one |= NETIF_F_GSO;
4239
4240         /* If even one device supports robust GSO, enable it for all. */
4241         if (one & NETIF_F_GSO_ROBUST)
4242                 all |= NETIF_F_GSO_ROBUST;
4243
4244         all &= one | NETIF_F_LLTX;
4245
4246         if (!(all & NETIF_F_ALL_CSUM))
4247                 all &= ~NETIF_F_SG;
4248         if (!(all & NETIF_F_SG))
4249                 all &= ~NETIF_F_GSO_MASK;
4250
4251         return all;
4252 }
4253 EXPORT_SYMBOL(netdev_compute_features);
4254
4255 static struct hlist_head *netdev_create_hash(void)
4256 {
4257         int i;
4258         struct hlist_head *hash;
4259
4260         hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
4261         if (hash != NULL)
4262                 for (i = 0; i < NETDEV_HASHENTRIES; i++)
4263                         INIT_HLIST_HEAD(&hash[i]);
4264
4265         return hash;
4266 }
4267
4268 /* Initialize per network namespace state */
4269 static int netdev_init(struct net *net)
4270 {
4271         INIT_LIST_HEAD(&net->dev_base_head);
4272         rwlock_init(&dev_base_lock);
4273
4274         net->dev_name_head = netdev_create_hash();
4275         if (net->dev_name_head == NULL)
4276                 goto err_name;
4277
4278         net->dev_index_head = netdev_create_hash();
4279         if (net->dev_index_head == NULL)
4280                 goto err_idx;
4281
4282         return 0;
4283
4284 err_idx:
4285         kfree(net->dev_name_head);
4286 err_name:
4287         return -ENOMEM;
4288 }
4289
4290 static void netdev_exit(struct net *net)
4291 {
4292         kfree(net->dev_name_head);
4293         kfree(net->dev_index_head);
4294 }
4295
4296 static struct pernet_operations netdev_net_ops = {
4297         .init = netdev_init,
4298         .exit = netdev_exit,
4299 };
4300
4301 static void default_device_exit(struct net *net)
4302 {
4303         struct net_device *dev, *next;
4304         /*
4305          * Push all migratable of the network devices back to the
4306          * initial network namespace
4307          */
4308         rtnl_lock();
4309         for_each_netdev_safe(net, dev, next) {
4310                 int err;
4311
4312                 /* Ignore unmoveable devices (i.e. loopback) */
4313                 if (dev->features & NETIF_F_NETNS_LOCAL)
4314                         continue;
4315
4316                 /* Push remaing network devices to init_net */
4317                 err = dev_change_net_namespace(dev, &init_net, "dev%d");
4318                 if (err) {
4319                         printk(KERN_WARNING "%s: failed to move %s to init_net: %d\n",
4320                                 __func__, dev->name, err);
4321                         unregister_netdevice(dev);
4322                 }
4323         }
4324         rtnl_unlock();
4325 }
4326
4327 static struct pernet_operations default_device_ops = {
4328         .exit = default_device_exit,
4329 };
4330
4331 /*
4332  *      Initialize the DEV module. At boot time this walks the device list and
4333  *      unhooks any devices that fail to initialise (normally hardware not
4334  *      present) and leaves us with a valid list of present and active devices.
4335  *
4336  */
4337
4338 /*
4339  *       This is called single threaded during boot, so no need
4340  *       to take the rtnl semaphore.
4341  */
4342 static int __init net_dev_init(void)
4343 {
4344         int i, rc = -ENOMEM;
4345
4346         BUG_ON(!dev_boot_phase);
4347
4348         if (dev_proc_init())
4349                 goto out;
4350
4351         if (netdev_kobject_init())
4352                 goto out;
4353
4354         INIT_LIST_HEAD(&ptype_all);
4355         for (i = 0; i < 16; i++)
4356                 INIT_LIST_HEAD(&ptype_base[i]);
4357
4358         if (register_pernet_subsys(&netdev_net_ops))
4359                 goto out;
4360
4361         if (register_pernet_device(&default_device_ops))
4362                 goto out;
4363
4364         /*
4365          *      Initialise the packet receive queues.
4366          */
4367
4368         for_each_possible_cpu(i) {
4369                 struct softnet_data *queue;
4370
4371                 queue = &per_cpu(softnet_data, i);
4372                 skb_queue_head_init(&queue->input_pkt_queue);
4373                 queue->completion_queue = NULL;
4374                 INIT_LIST_HEAD(&queue->poll_list);
4375
4376                 queue->backlog.poll = process_backlog;
4377                 queue->backlog.weight = weight_p;
4378         }
4379
4380         netdev_dma_register();
4381
4382         dev_boot_phase = 0;
4383
4384         open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
4385         open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
4386
4387         hotcpu_notifier(dev_cpu_callback, 0);
4388         dst_init();
4389         dev_mcast_init();
4390         rc = 0;
4391 out:
4392         return rc;
4393 }
4394
4395 subsys_initcall(net_dev_init);
4396
4397 EXPORT_SYMBOL(__dev_get_by_index);
4398 EXPORT_SYMBOL(__dev_get_by_name);
4399 EXPORT_SYMBOL(__dev_remove_pack);
4400 EXPORT_SYMBOL(dev_valid_name);
4401 EXPORT_SYMBOL(dev_add_pack);
4402 EXPORT_SYMBOL(dev_alloc_name);
4403 EXPORT_SYMBOL(dev_close);
4404 EXPORT_SYMBOL(dev_get_by_flags);
4405 EXPORT_SYMBOL(dev_get_by_index);
4406 EXPORT_SYMBOL(dev_get_by_name);
4407 EXPORT_SYMBOL(dev_open);
4408 EXPORT_SYMBOL(dev_queue_xmit);
4409 EXPORT_SYMBOL(dev_remove_pack);
4410 EXPORT_SYMBOL(dev_set_allmulti);
4411 EXPORT_SYMBOL(dev_set_promiscuity);
4412 EXPORT_SYMBOL(dev_change_flags);
4413 EXPORT_SYMBOL(dev_set_mtu);
4414 EXPORT_SYMBOL(dev_set_mac_address);
4415 EXPORT_SYMBOL(free_netdev);
4416 EXPORT_SYMBOL(netdev_boot_setup_check);
4417 EXPORT_SYMBOL(netdev_set_master);
4418 EXPORT_SYMBOL(netdev_state_change);
4419 EXPORT_SYMBOL(netif_receive_skb);
4420 EXPORT_SYMBOL(netif_rx);
4421 EXPORT_SYMBOL(register_gifconf);
4422 EXPORT_SYMBOL(register_netdevice);
4423 EXPORT_SYMBOL(register_netdevice_notifier);
4424 EXPORT_SYMBOL(skb_checksum_help);
4425 EXPORT_SYMBOL(synchronize_net);
4426 EXPORT_SYMBOL(unregister_netdevice);
4427 EXPORT_SYMBOL(unregister_netdevice_notifier);
4428 EXPORT_SYMBOL(net_enable_timestamp);
4429 EXPORT_SYMBOL(net_disable_timestamp);
4430 EXPORT_SYMBOL(dev_get_flags);
4431
4432 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
4433 EXPORT_SYMBOL(br_handle_frame_hook);
4434 EXPORT_SYMBOL(br_fdb_get_hook);
4435 EXPORT_SYMBOL(br_fdb_put_hook);
4436 #endif
4437
4438 #ifdef CONFIG_KMOD
4439 EXPORT_SYMBOL(dev_load);
4440 #endif
4441
4442 EXPORT_PER_CPU_SYMBOL(softnet_data);