err.no Git - linux-2.6/blob - net/core/dev.c

   1 /*
   2  *      NET3    Protocol independent device support routines.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  *      Derived from the non IP parts of dev.c 1.0.19
  10  *              Authors:        Ross Biro
  11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *
  14  *      Additional Authors:
  15  *              Florian la Roche <rzsfl@rz.uni-sb.de>
  16  *              Alan Cox <gw4pts@gw4pts.ampr.org>
  17  *              David Hinds <dahinds@users.sourceforge.net>
  18  *              Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  19  *              Adam Sulmicki <adam@cfar.umd.edu>
  20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
  21  *
  22  *      Changes:
  23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
  24  *                                      to 2 if register_netdev gets called
  25  *                                      before net_dev_init & also removed a
  26  *                                      few lines of code in the process.
  27  *              Alan Cox        :       device private ioctl copies fields back.
  28  *              Alan Cox        :       Transmit queue code does relevant
  29  *                                      stunts to keep the queue safe.
  30  *              Alan Cox        :       Fixed double lock.
  31  *              Alan Cox        :       Fixed promisc NULL pointer trap
  32  *              ????????        :       Support the full private ioctl range
  33  *              Alan Cox        :       Moved ioctl permission check into
  34  *                                      drivers
  35  *              Tim Kordas      :       SIOCADDMULTI/SIOCDELMULTI
  36  *              Alan Cox        :       100 backlog just doesn't cut it when
  37  *                                      you start doing multicast video 8)
  38  *              Alan Cox        :       Rewrote net_bh and list manager.
  39  *              Alan Cox        :       Fix ETH_P_ALL echoback lengths.
  40  *              Alan Cox        :       Took out transmit every packet pass
  41  *                                      Saved a few bytes in the ioctl handler
  42  *              Alan Cox        :       Network driver sets packet type before
  43  *                                      calling netif_rx. Saves a function
  44  *                                      call a packet.
  45  *              Alan Cox        :       Hashed net_bh()
  46  *              Richard Kooijman:       Timestamp fixes.
  47  *              Alan Cox        :       Wrong field in SIOCGIFDSTADDR
  48  *              Alan Cox        :       Device lock protection.
  49  *              Alan Cox        :       Fixed nasty side effect of device close
  50  *                                      changes.
  51  *              Rudi Cilibrasi  :       Pass the right thing to
  52  *                                      set_mac_address()
  53  *              Dave Miller     :       32bit quantity for the device lock to
  54  *                                      make it work out on a Sparc.
  55  *              Bjorn Ekwall    :       Added KERNELD hack.
  56  *              Alan Cox        :       Cleaned up the backlog initialise.
  57  *              Craig Metz      :       SIOCGIFCONF fix if space for under
  58  *                                      1 device.
  59  *          Thomas Bogendoerfer :       Return ENODEV for dev_open, if there
  60  *                                      is no device open function.
  61  *              Andi Kleen      :       Fix error reporting for SIOCGIFCONF
  62  *          Michael Chastain    :       Fix signed/unsigned for SIOCGIFCONF
  63  *              Cyrus Durgin    :       Cleaned for KMOD
  64  *              Adam Sulmicki   :       Bug Fix : Network Device Unload
  65  *                                      A network device unload needs to purge
  66  *                                      the backlog queue.
  67  *      Paul Rusty Russell      :       SIOCSIFNAME
  68  *              Pekka Riikonen  :       Netdev boot-time settings code
  69  *              Andrew Morton   :       Make unregister_netdevice wait
  70  *                                      indefinitely on dev->refcnt
  71  *              J Hadi Salim    :       - Backlog queue sampling
  72  *                                      - netif_rx() feedback
  73  */
  74
  75 #include <asm/uaccess.h>
  76 #include <asm/system.h>
  77 #include <linux/bitops.h>
  78 #include <linux/capability.h>
  79 #include <linux/cpu.h>
  80 #include <linux/types.h>
  81 #include <linux/kernel.h>
  82 #include <linux/sched.h>
  83 #include <linux/mutex.h>
  84 #include <linux/string.h>
  85 #include <linux/mm.h>
  86 #include <linux/socket.h>
  87 #include <linux/sockios.h>
  88 #include <linux/errno.h>
  89 #include <linux/interrupt.h>
  90 #include <linux/if_ether.h>
  91 #include <linux/netdevice.h>
  92 #include <linux/etherdevice.h>
  93 #include <linux/notifier.h>
  94 #include <linux/skbuff.h>
  95 #include <net/sock.h>
  96 #include <linux/rtnetlink.h>
  97 #include <linux/proc_fs.h>
  98 #include <linux/seq_file.h>
  99 #include <linux/stat.h>
 100 #include <linux/if_bridge.h>
 101 #include <net/dst.h>
 102 #include <net/pkt_sched.h>
 103 #include <net/checksum.h>
 104 #include <linux/highmem.h>
 105 #include <linux/init.h>
 106 #include <linux/kmod.h>
 107 #include <linux/module.h>
 108 #include <linux/kallsyms.h>
 109 #include <linux/netpoll.h>
 110 #include <linux/rcupdate.h>
 111 #include <linux/delay.h>
 112 #include <linux/wireless.h>
 113 #include <net/iw_handler.h>
 114 #include <asm/current.h>
 115 #include <linux/audit.h>
 116 #include <linux/dmaengine.h>
 117 #include <linux/err.h>
 118 #include <linux/ctype.h>
 119
 120 /*
 121  *      The list of packet types we will receive (as opposed to discard)
 122  *      and the routines to invoke.
 123  *
 124  *      Why 16. Because with 16 the only overlap we get on a hash of the
 125  *      low nibble of the protocol value is RARP/SNAP/X.25.
 126  *
 127  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
 128  *             sure which should go first, but I bet it won't make much
 129  *             difference if we are running VLANs.  The good news is that
 130  *             this protocol won't be in the list unless compiled in, so
 131  *             the average user (w/out VLANs) will not be adversely affected.
 132  *             --BLG
 133  *
 134  *              0800    IP
 135  *              8100    802.1Q VLAN
 136  *              0001    802.3
 137  *              0002    AX.25
 138  *              0004    802.2
 139  *              8035    RARP
 140  *              0005    SNAP
 141  *              0805    X.25
 142  *              0806    ARP
 143  *              8137    IPX
 144  *              0009    Localtalk
 145  *              86DD    IPv6
 146  */
 147
 148 static DEFINE_SPINLOCK(ptype_lock);
 149 static struct list_head ptype_base[16]; /* 16 way hashed list */
 150 static struct list_head ptype_all;              /* Taps */
 151
 152 #ifdef CONFIG_NET_DMA
 153 static struct dma_client *net_dma_client;
 154 static unsigned int net_dma_count;
 155 static spinlock_t net_dma_event_lock;
 156 #endif
 157
 158 /*
 159  * The @dev_base list is protected by @dev_base_lock and the rtnl
 160  * semaphore.
 161  *
 162  * Pure readers hold dev_base_lock for reading.
 163  *
 164  * Writers must hold the rtnl semaphore while they loop through the
 165  * dev_base list, and hold dev_base_lock for writing when they do the
 166  * actual updates.  This allows pure readers to access the list even
 167  * while a writer is preparing to update it.
 168  *
 169  * To put it another way, dev_base_lock is held for writing only to
 170  * protect against pure readers; the rtnl semaphore provides the
 171  * protection against other writers.
 172  *
 173  * See, for example usages, register_netdevice() and
 174  * unregister_netdevice(), which must be called with the rtnl
 175  * semaphore held.
 176  */
 177 struct net_device *dev_base;
 178 static struct net_device **dev_tail = &dev_base;
 179 DEFINE_RWLOCK(dev_base_lock);
 180
 181 EXPORT_SYMBOL(dev_base);
 182 EXPORT_SYMBOL(dev_base_lock);
 183
 184 #define NETDEV_HASHBITS 8
 185 static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
 186 static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
 187
 188 static inline struct hlist_head *dev_name_hash(const char *name)
 189 {
 190         unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
 191         return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
 192 }
 193
 194 static inline struct hlist_head *dev_index_hash(int ifindex)
 195 {
 196         return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
 197 }
 198
 199 /*
 200  *      Our notifier list
 201  */
 202
 203 static RAW_NOTIFIER_HEAD(netdev_chain);
 204
 205 /*
 206  *      Device drivers call our routines to queue packets here. We empty the
 207  *      queue in the local softnet handler.
 208  */
 209 DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
 210
 211 #ifdef CONFIG_SYSFS
 212 extern int netdev_sysfs_init(void);
 213 extern int netdev_register_sysfs(struct net_device *);
 214 extern void netdev_unregister_sysfs(struct net_device *);
 215 #else
 216 #define netdev_sysfs_init()             (0)
 217 #define netdev_register_sysfs(dev)      (0)
 218 #define netdev_unregister_sysfs(dev)    do { } while(0)
 219 #endif
 220
 221
 222 /*******************************************************************************
 223
 224                 Protocol management and registration routines
 225
 226 *******************************************************************************/
 227
 228 /*
 229  *      For efficiency
 230  */
 231
 232 static int netdev_nit;
 233
 234 /*
 235  *      Add a protocol ID to the list. Now that the input handler is
 236  *      smarter we can dispense with all the messy stuff that used to be
 237  *      here.
 238  *
 239  *      BEWARE!!! Protocol handlers, mangling input packets,
 240  *      MUST BE last in hash buckets and checking protocol handlers
 241  *      MUST start from promiscuous ptype_all chain in net_bh.
 242  *      It is true now, do not change it.
 243  *      Explanation follows: if protocol handler, mangling packet, will
 244  *      be the first on list, it is not able to sense, that packet
 245  *      is cloned and should be copied-on-write, so that it will
 246  *      change it and subsequent readers will get broken packet.
 247  *                                                      --ANK (980803)
 248  */
 249
 250 /**
 251  *      dev_add_pack - add packet handler
 252  *      @pt: packet type declaration
 253  *
 254  *      Add a protocol handler to the networking stack. The passed &packet_type
 255  *      is linked into kernel lists and may not be freed until it has been
 256  *      removed from the kernel lists.
 257  *
 258  *      This call does not sleep therefore it can not
 259  *      guarantee all CPU's that are in middle of receiving packets
 260  *      will see the new packet type (until the next received packet).
 261  */
 262
 263 void dev_add_pack(struct packet_type *pt)
 264 {
 265         int hash;
 266
 267         spin_lock_bh(&ptype_lock);
 268         if (pt->type == htons(ETH_P_ALL)) {
 269                 netdev_nit++;
 270                 list_add_rcu(&pt->list, &ptype_all);
 271         } else {
 272                 hash = ntohs(pt->type) & 15;
 273                 list_add_rcu(&pt->list, &ptype_base[hash]);
 274         }
 275         spin_unlock_bh(&ptype_lock);
 276 }
 277
 278 /**
 279  *      __dev_remove_pack        - remove packet handler
 280  *      @pt: packet type declaration
 281  *
 282  *      Remove a protocol handler that was previously added to the kernel
 283  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 284  *      from the kernel lists and can be freed or reused once this function
 285  *      returns.
 286  *
 287  *      The packet type might still be in use by receivers
 288  *      and must not be freed until after all the CPU's have gone
 289  *      through a quiescent state.
 290  */
 291 void __dev_remove_pack(struct packet_type *pt)
 292 {
 293         struct list_head *head;
 294         struct packet_type *pt1;
 295
 296         spin_lock_bh(&ptype_lock);
 297
 298         if (pt->type == htons(ETH_P_ALL)) {
 299                 netdev_nit--;
 300                 head = &ptype_all;
 301         } else
 302                 head = &ptype_base[ntohs(pt->type) & 15];
 303
 304         list_for_each_entry(pt1, head, list) {
 305                 if (pt == pt1) {
 306                         list_del_rcu(&pt->list);
 307                         goto out;
 308                 }
 309         }
 310
 311         printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
 312 out:
 313         spin_unlock_bh(&ptype_lock);
 314 }
 315 /**
 316  *      dev_remove_pack  - remove packet handler
 317  *      @pt: packet type declaration
 318  *
 319  *      Remove a protocol handler that was previously added to the kernel
 320  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 321  *      from the kernel lists and can be freed or reused once this function
 322  *      returns.
 323  *
 324  *      This call sleeps to guarantee that no CPU is looking at the packet
 325  *      type after return.
 326  */
 327 void dev_remove_pack(struct packet_type *pt)
 328 {
 329         __dev_remove_pack(pt);
 330
 331         synchronize_net();
 332 }
 333
 334 /******************************************************************************
 335
 336                       Device Boot-time Settings Routines
 337
 338 *******************************************************************************/
 339
 340 /* Boot time configuration table */
 341 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
 342
 343 /**
 344  *      netdev_boot_setup_add   - add new setup entry
 345  *      @name: name of the device
 346  *      @map: configured settings for the device
 347  *
 348  *      Adds new setup entry to the dev_boot_setup list.  The function
 349  *      returns 0 on error and 1 on success.  This is a generic routine to
 350  *      all netdevices.
 351  */
 352 static int netdev_boot_setup_add(char *name, struct ifmap *map)
 353 {
 354         struct netdev_boot_setup *s;
 355         int i;
 356
 357         s = dev_boot_setup;
 358         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 359                 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
 360                         memset(s[i].name, 0, sizeof(s[i].name));
 361                         strcpy(s[i].name, name);
 362                         memcpy(&s[i].map, map, sizeof(s[i].map));
 363                         break;
 364                 }
 365         }
 366
 367         return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
 368 }
 369
 370 /**
 371  *      netdev_boot_setup_check - check boot time settings
 372  *      @dev: the netdevice
 373  *
 374  *      Check boot time settings for the device.
 375  *      The found settings are set for the device to be used
 376  *      later in the device probing.
 377  *      Returns 0 if no settings found, 1 if they are.
 378  */
 379 int netdev_boot_setup_check(struct net_device *dev)
 380 {
 381         struct netdev_boot_setup *s = dev_boot_setup;
 382         int i;
 383
 384         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 385                 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
 386                     !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
 387                         dev->irq        = s[i].map.irq;
 388                         dev->base_addr  = s[i].map.base_addr;
 389                         dev->mem_start  = s[i].map.mem_start;
 390                         dev->mem_end    = s[i].map.mem_end;
 391                         return 1;
 392                 }
 393         }
 394         return 0;
 395 }
 396
 397
 398 /**
 399  *      netdev_boot_base        - get address from boot time settings
 400  *      @prefix: prefix for network device
 401  *      @unit: id for network device
 402  *
 403  *      Check boot time settings for the base address of device.
 404  *      The found settings are set for the device to be used
 405  *      later in the device probing.
 406  *      Returns 0 if no settings found.
 407  */
 408 unsigned long netdev_boot_base(const char *prefix, int unit)
 409 {
 410         const struct netdev_boot_setup *s = dev_boot_setup;
 411         char name[IFNAMSIZ];
 412         int i;
 413
 414         sprintf(name, "%s%d", prefix, unit);
 415
 416         /*
 417          * If device already registered then return base of 1
 418          * to indicate not to probe for this interface
 419          */
 420         if (__dev_get_by_name(name))
 421                 return 1;
 422
 423         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
 424                 if (!strcmp(name, s[i].name))
 425                         return s[i].map.base_addr;
 426         return 0;
 427 }
 428
 429 /*
 430  * Saves at boot time configured settings for any netdevice.
 431  */
 432 int __init netdev_boot_setup(char *str)
 433 {
 434         int ints[5];
 435         struct ifmap map;
 436
 437         str = get_options(str, ARRAY_SIZE(ints), ints);
 438         if (!str || !*str)
 439                 return 0;
 440
 441         /* Save settings */
 442         memset(&map, 0, sizeof(map));
 443         if (ints[0] > 0)
 444                 map.irq = ints[1];
 445         if (ints[0] > 1)
 446                 map.base_addr = ints[2];
 447         if (ints[0] > 2)
 448                 map.mem_start = ints[3];
 449         if (ints[0] > 3)
 450                 map.mem_end = ints[4];
 451
 452         /* Add new entry to the list */
 453         return netdev_boot_setup_add(str, &map);
 454 }
 455
 456 __setup("netdev=", netdev_boot_setup);
 457
 458 /*******************************************************************************
 459
 460                             Device Interface Subroutines
 461
 462 *******************************************************************************/
 463
 464 /**
 465  *      __dev_get_by_name       - find a device by its name
 466  *      @name: name to find
 467  *
 468  *      Find an interface by name. Must be called under RTNL semaphore
 469  *      or @dev_base_lock. If the name is found a pointer to the device
 470  *      is returned. If the name is not found then %NULL is returned. The
 471  *      reference counters are not incremented so the caller must be
 472  *      careful with locks.
 473  */
 474
 475 struct net_device *__dev_get_by_name(const char *name)
 476 {
 477         struct hlist_node *p;
 478
 479         hlist_for_each(p, dev_name_hash(name)) {
 480                 struct net_device *dev
 481                         = hlist_entry(p, struct net_device, name_hlist);
 482                 if (!strncmp(dev->name, name, IFNAMSIZ))
 483                         return dev;
 484         }
 485         return NULL;
 486 }
 487
 488 /**
 489  *      dev_get_by_name         - find a device by its name
 490  *      @name: name to find
 491  *
 492  *      Find an interface by name. This can be called from any
 493  *      context and does its own locking. The returned handle has
 494  *      the usage count incremented and the caller must use dev_put() to
 495  *      release it when it is no longer needed. %NULL is returned if no
 496  *      matching device is found.
 497  */
 498
 499 struct net_device *dev_get_by_name(const char *name)
 500 {
 501         struct net_device *dev;
 502
 503         read_lock(&dev_base_lock);
 504         dev = __dev_get_by_name(name);
 505         if (dev)
 506                 dev_hold(dev);
 507         read_unlock(&dev_base_lock);
 508         return dev;
 509 }
 510
 511 /**
 512  *      __dev_get_by_index - find a device by its ifindex
 513  *      @ifindex: index of device
 514  *
 515  *      Search for an interface by index. Returns %NULL if the device
 516  *      is not found or a pointer to the device. The device has not
 517  *      had its reference counter increased so the caller must be careful
 518  *      about locking. The caller must hold either the RTNL semaphore
 519  *      or @dev_base_lock.
 520  */
 521
 522 struct net_device *__dev_get_by_index(int ifindex)
 523 {
 524         struct hlist_node *p;
 525
 526         hlist_for_each(p, dev_index_hash(ifindex)) {
 527                 struct net_device *dev
 528                         = hlist_entry(p, struct net_device, index_hlist);
 529                 if (dev->ifindex == ifindex)
 530                         return dev;
 531         }
 532         return NULL;
 533 }
 534
 535
 536 /**
 537  *      dev_get_by_index - find a device by its ifindex
 538  *      @ifindex: index of device
 539  *
 540  *      Search for an interface by index. Returns NULL if the device
 541  *      is not found or a pointer to the device. The device returned has
 542  *      had a reference added and the pointer is safe until the user calls
 543  *      dev_put to indicate they have finished with it.
 544  */
 545
 546 struct net_device *dev_get_by_index(int ifindex)
 547 {
 548         struct net_device *dev;
 549
 550         read_lock(&dev_base_lock);
 551         dev = __dev_get_by_index(ifindex);
 552         if (dev)
 553                 dev_hold(dev);
 554         read_unlock(&dev_base_lock);
 555         return dev;
 556 }
 557
 558 /**
 559  *      dev_getbyhwaddr - find a device by its hardware address
 560  *      @type: media type of device
 561  *      @ha: hardware address
 562  *
 563  *      Search for an interface by MAC address. Returns NULL if the device
 564  *      is not found or a pointer to the device. The caller must hold the
 565  *      rtnl semaphore. The returned device has not had its ref count increased
 566  *      and the caller must therefore be careful about locking
 567  *
 568  *      BUGS:
 569  *      If the API was consistent this would be __dev_get_by_hwaddr
 570  */
 571
 572 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
 573 {
 574         struct net_device *dev;
 575
 576         ASSERT_RTNL();
 577
 578         for (dev = dev_base; dev; dev = dev->next)
 579                 if (dev->type == type &&
 580                     !memcmp(dev->dev_addr, ha, dev->addr_len))
 581                         break;
 582         return dev;
 583 }
 584
 585 EXPORT_SYMBOL(dev_getbyhwaddr);
 586
 587 struct net_device *dev_getfirstbyhwtype(unsigned short type)
 588 {
 589         struct net_device *dev;
 590
 591         rtnl_lock();
 592         for (dev = dev_base; dev; dev = dev->next) {
 593                 if (dev->type == type) {
 594                         dev_hold(dev);
 595                         break;
 596                 }
 597         }
 598         rtnl_unlock();
 599         return dev;
 600 }
 601
 602 EXPORT_SYMBOL(dev_getfirstbyhwtype);
 603
 604 /**
 605  *      dev_get_by_flags - find any device with given flags
 606  *      @if_flags: IFF_* values
 607  *      @mask: bitmask of bits in if_flags to check
 608  *
 609  *      Search for any interface with the given flags. Returns NULL if a device
 610  *      is not found or a pointer to the device. The device returned has
 611  *      had a reference added and the pointer is safe until the user calls
 612  *      dev_put to indicate they have finished with it.
 613  */
 614
 615 struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
 616 {
 617         struct net_device *dev;
 618
 619         read_lock(&dev_base_lock);
 620         for (dev = dev_base; dev != NULL; dev = dev->next) {
 621                 if (((dev->flags ^ if_flags) & mask) == 0) {
 622                         dev_hold(dev);
 623                         break;
 624                 }
 625         }
 626         read_unlock(&dev_base_lock);
 627         return dev;
 628 }
 629
 630 /**
 631  *      dev_valid_name - check if name is okay for network device
 632  *      @name: name string
 633  *
 634  *      Network device names need to be valid file names to
 635  *      to allow sysfs to work.  We also disallow any kind of
 636  *      whitespace.
 637  */
 638 int dev_valid_name(const char *name)
 639 {
 640         if (*name == '\0')
 641                 return 0;
 642         if (strlen(name) >= IFNAMSIZ)
 643                 return 0;
 644         if (!strcmp(name, ".") || !strcmp(name, ".."))
 645                 return 0;
 646
 647         while (*name) {
 648                 if (*name == '/' || isspace(*name))
 649                         return 0;
 650                 name++;
 651         }
 652         return 1;
 653 }
 654
 655 /**
 656  *      dev_alloc_name - allocate a name for a device
 657  *      @dev: device
 658  *      @name: name format string
 659  *
 660  *      Passed a format string - eg "lt%d" it will try and find a suitable
 661  *      id. It scans list of devices to build up a free map, then chooses
 662  *      the first empty slot. The caller must hold the dev_base or rtnl lock
 663  *      while allocating the name and adding the device in order to avoid
 664  *      duplicates.
 665  *      Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 666  *      Returns the number of the unit assigned or a negative errno code.
 667  */
 668
 669 int dev_alloc_name(struct net_device *dev, const char *name)
 670 {
 671         int i = 0;
 672         char buf[IFNAMSIZ];
 673         const char *p;
 674         const int max_netdevices = 8*PAGE_SIZE;
 675         long *inuse;
 676         struct net_device *d;
 677
 678         p = strnchr(name, IFNAMSIZ-1, '%');
 679         if (p) {
 680                 /*
 681                  * Verify the string as this thing may have come from
 682                  * the user.  There must be either one "%d" and no other "%"
 683                  * characters.
 684                  */
 685                 if (p[1] != 'd' || strchr(p + 2, '%'))
 686                         return -EINVAL;
 687
 688                 /* Use one page as a bit array of possible slots */
 689                 inuse = (long *) get_zeroed_page(GFP_ATOMIC);
 690                 if (!inuse)
 691                         return -ENOMEM;
 692
 693                 for (d = dev_base; d; d = d->next) {
 694                         if (!sscanf(d->name, name, &i))
 695                                 continue;
 696                         if (i < 0 || i >= max_netdevices)
 697                                 continue;
 698
 699                         /*  avoid cases where sscanf is not exact inverse of printf */
 700                         snprintf(buf, sizeof(buf), name, i);
 701                         if (!strncmp(buf, d->name, IFNAMSIZ))
 702                                 set_bit(i, inuse);
 703                 }
 704
 705                 i = find_first_zero_bit(inuse, max_netdevices);
 706                 free_page((unsigned long) inuse);
 707         }
 708
 709         snprintf(buf, sizeof(buf), name, i);
 710         if (!__dev_get_by_name(buf)) {
 711                 strlcpy(dev->name, buf, IFNAMSIZ);
 712                 return i;
 713         }
 714
 715         /* It is possible to run out of possible slots
 716          * when the name is long and there isn't enough space left
 717          * for the digits, or if all bits are used.
 718          */
 719         return -ENFILE;
 720 }
 721
 722
 723 /**
 724  *      dev_change_name - change name of a device
 725  *      @dev: device
 726  *      @newname: name (or format string) must be at least IFNAMSIZ
 727  *
 728  *      Change name of a device, can pass format strings "eth%d".
 729  *      for wildcarding.
 730  */
 731 int dev_change_name(struct net_device *dev, char *newname)
 732 {
 733         int err = 0;
 734
 735         ASSERT_RTNL();
 736
 737         if (dev->flags & IFF_UP)
 738                 return -EBUSY;
 739
 740         if (!dev_valid_name(newname))
 741                 return -EINVAL;
 742
 743         if (strchr(newname, '%')) {
 744                 err = dev_alloc_name(dev, newname);
 745                 if (err < 0)
 746                         return err;
 747                 strcpy(newname, dev->name);
 748         }
 749         else if (__dev_get_by_name(newname))
 750                 return -EEXIST;
 751         else
 752                 strlcpy(dev->name, newname, IFNAMSIZ);
 753
 754         device_rename(&dev->dev, dev->name);
 755         hlist_del(&dev->name_hlist);
 756         hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
 757         raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
 758
 759         return err;
 760 }
 761
 762 /**
 763  *      netdev_features_change - device changes features
 764  *      @dev: device to cause notification
 765  *
 766  *      Called to indicate a device has changed features.
 767  */
 768 void netdev_features_change(struct net_device *dev)
 769 {
 770         raw_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
 771 }
 772 EXPORT_SYMBOL(netdev_features_change);
 773
 774 /**
 775  *      netdev_state_change - device changes state
 776  *      @dev: device to cause notification
 777  *
 778  *      Called to indicate a device has changed state. This function calls
 779  *      the notifier chains for netdev_chain and sends a NEWLINK message
 780  *      to the routing socket.
 781  */
 782 void netdev_state_change(struct net_device *dev)
 783 {
 784         if (dev->flags & IFF_UP) {
 785                 raw_notifier_call_chain(&netdev_chain,
 786                                 NETDEV_CHANGE, dev);
 787                 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
 788         }
 789 }
 790
 791 /**
 792  *      dev_load        - load a network module
 793  *      @name: name of interface
 794  *
 795  *      If a network interface is not present and the process has suitable
 796  *      privileges this function loads the module. If module loading is not
 797  *      available in this kernel then it becomes a nop.
 798  */
 799
 800 void dev_load(const char *name)
 801 {
 802         struct net_device *dev;
 803
 804         read_lock(&dev_base_lock);
 805         dev = __dev_get_by_name(name);
 806         read_unlock(&dev_base_lock);
 807
 808         if (!dev && capable(CAP_SYS_MODULE))
 809                 request_module("%s", name);
 810 }
 811
 812 static int default_rebuild_header(struct sk_buff *skb)
 813 {
 814         printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
 815                skb->dev ? skb->dev->name : "NULL!!!");
 816         kfree_skb(skb);
 817         return 1;
 818 }
 819
 820
 821 /**
 822  *      dev_open        - prepare an interface for use.
 823  *      @dev:   device to open
 824  *
 825  *      Takes a device from down to up state. The device's private open
 826  *      function is invoked and then the multicast lists are loaded. Finally
 827  *      the device is moved into the up state and a %NETDEV_UP message is
 828  *      sent to the netdev notifier chain.
 829  *
 830  *      Calling this function on an active interface is a nop. On a failure
 831  *      a negative errno code is returned.
 832  */
 833 int dev_open(struct net_device *dev)
 834 {
 835         int ret = 0;
 836
 837         /*
 838          *      Is it already up?
 839          */
 840
 841         if (dev->flags & IFF_UP)
 842                 return 0;
 843
 844         /*
 845          *      Is it even present?
 846          */
 847         if (!netif_device_present(dev))
 848                 return -ENODEV;
 849
 850         /*
 851          *      Call device private open method
 852          */
 853         set_bit(__LINK_STATE_START, &dev->state);
 854         if (dev->open) {
 855                 ret = dev->open(dev);
 856                 if (ret)
 857                         clear_bit(__LINK_STATE_START, &dev->state);
 858         }
 859
 860         /*
 861          *      If it went open OK then:
 862          */
 863
 864         if (!ret) {
 865                 /*
 866                  *      Set the flags.
 867                  */
 868                 dev->flags |= IFF_UP;
 869
 870                 /*
 871                  *      Initialize multicasting status
 872                  */
 873                 dev_mc_upload(dev);
 874
 875                 /*
 876                  *      Wakeup transmit queue engine
 877                  */
 878                 dev_activate(dev);
 879
 880                 /*
 881                  *      ... and announce new interface.
 882                  */
 883                 raw_notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
 884         }
 885         return ret;
 886 }
 887
 888 /**
 889  *      dev_close - shutdown an interface.
 890  *      @dev: device to shutdown
 891  *
 892  *      This function moves an active device into down state. A
 893  *      %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
 894  *      is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
 895  *      chain.
 896  */
 897 int dev_close(struct net_device *dev)
 898 {
 899         if (!(dev->flags & IFF_UP))
 900                 return 0;
 901
 902         /*
 903          *      Tell people we are going down, so that they can
 904          *      prepare to death, when device is still operating.
 905          */
 906         raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
 907
 908         dev_deactivate(dev);
 909
 910         clear_bit(__LINK_STATE_START, &dev->state);
 911
 912         /* Synchronize to scheduled poll. We cannot touch poll list,
 913          * it can be even on different cpu. So just clear netif_running(),
 914          * and wait when poll really will happen. Actually, the best place
 915          * for this is inside dev->stop() after device stopped its irq
 916          * engine, but this requires more changes in devices. */
 917
 918         smp_mb__after_clear_bit(); /* Commit netif_running(). */
 919         while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
 920                 /* No hurry. */
 921                 msleep(1);
 922         }
 923
 924         /*
 925          *      Call the device specific close. This cannot fail.
 926          *      Only if device is UP
 927          *
 928          *      We allow it to be called even after a DETACH hot-plug
 929          *      event.
 930          */
 931         if (dev->stop)
 932                 dev->stop(dev);
 933
 934         /*
 935          *      Device is now down.
 936          */
 937
 938         dev->flags &= ~IFF_UP;
 939
 940         /*
 941          * Tell people we are down
 942          */
 943         raw_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
 944
 945         return 0;
 946 }
 947
 948
 949 /*
 950  *      Device change register/unregister. These are not inline or static
 951  *      as we export them to the world.
 952  */
 953
 954 /**
 955  *      register_netdevice_notifier - register a network notifier block
 956  *      @nb: notifier
 957  *
 958  *      Register a notifier to be called when network device events occur.
 959  *      The notifier passed is linked into the kernel structures and must
 960  *      not be reused until it has been unregistered. A negative errno code
 961  *      is returned on a failure.
 962  *
 963  *      When registered all registration and up events are replayed
 964  *      to the new notifier to allow device to have a race free
 965  *      view of the network device list.
 966  */
 967
 968 int register_netdevice_notifier(struct notifier_block *nb)
 969 {
 970         struct net_device *dev;
 971         int err;
 972
 973         rtnl_lock();
 974         err = raw_notifier_chain_register(&netdev_chain, nb);
 975         if (!err) {
 976                 for (dev = dev_base; dev; dev = dev->next) {
 977                         nb->notifier_call(nb, NETDEV_REGISTER, dev);
 978
 979                         if (dev->flags & IFF_UP)
 980                                 nb->notifier_call(nb, NETDEV_UP, dev);
 981                 }
 982         }
 983         rtnl_unlock();
 984         return err;
 985 }
 986
 987 /**
 988  *      unregister_netdevice_notifier - unregister a network notifier block
 989  *      @nb: notifier
 990  *
 991  *      Unregister a notifier previously registered by
 992  *      register_netdevice_notifier(). The notifier is unlinked into the
 993  *      kernel structures and may then be reused. A negative errno code
 994  *      is returned on a failure.
 995  */
 996
 997 int unregister_netdevice_notifier(struct notifier_block *nb)
 998 {
 999         int err;
1000
1001         rtnl_lock();
1002         err = raw_notifier_chain_unregister(&netdev_chain, nb);
1003         rtnl_unlock();
1004         return err;
1005 }
1006
1007 /**
1008  *      call_netdevice_notifiers - call all network notifier blocks
1009  *      @val: value passed unmodified to notifier function
1010  *      @v:   pointer passed unmodified to notifier function
1011  *
1012  *      Call all network notifier blocks.  Parameters and return value
1013  *      are as for raw_notifier_call_chain().
1014  */
1015
1016 int call_netdevice_notifiers(unsigned long val, void *v)
1017 {
1018         return raw_notifier_call_chain(&netdev_chain, val, v);
1019 }
1020
1021 /* When > 0 there are consumers of rx skb time stamps */
1022 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1023
1024 void net_enable_timestamp(void)
1025 {
1026         atomic_inc(&netstamp_needed);
1027 }
1028
1029 void net_disable_timestamp(void)
1030 {
1031         atomic_dec(&netstamp_needed);
1032 }
1033
1034 static inline void net_timestamp(struct sk_buff *skb)
1035 {
1036         if (atomic_read(&netstamp_needed))
1037                 __net_timestamp(skb);
1038         else
1039                 skb->tstamp.tv64 = 0;
1040 }
1041
1042 /*
1043  *      Support routine. Sends outgoing frames to any network
1044  *      taps currently in use.
1045  */
1046
1047 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1048 {
1049         struct packet_type *ptype;
1050
1051         net_timestamp(skb);
1052
1053         rcu_read_lock();
1054         list_for_each_entry_rcu(ptype, &ptype_all, list) {
1055                 /* Never send packets back to the socket
1056                  * they originated from - MvS (miquels@drinkel.ow.org)
1057                  */
1058                 if ((ptype->dev == dev || !ptype->dev) &&
1059                     (ptype->af_packet_priv == NULL ||
1060                      (struct sock *)ptype->af_packet_priv != skb->sk)) {
1061                         struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1062                         if (!skb2)
1063                                 break;
1064
1065                         /* skb->nh should be correctly
1066                            set by sender, so that the second statement is
1067                            just protection against buggy protocols.
1068                          */
1069                         skb_reset_mac_header(skb2);
1070
1071                         if (skb2->nh.raw < skb2->data ||
1072                             skb2->nh.raw > skb2->tail) {
1073                                 if (net_ratelimit())
1074                                         printk(KERN_CRIT "protocol %04x is "
1075                                                "buggy, dev %s\n",
1076                                                skb2->protocol, dev->name);
1077                                 skb_reset_network_header(skb2);
1078                         }
1079
1080                         skb2->h.raw = skb2->nh.raw;
1081                         skb2->pkt_type = PACKET_OUTGOING;
1082                         ptype->func(skb2, skb->dev, ptype, skb->dev);
1083                 }
1084         }
1085         rcu_read_unlock();
1086 }
1087
1088
1089 void __netif_schedule(struct net_device *dev)
1090 {
1091         if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
1092                 unsigned long flags;
1093                 struct softnet_data *sd;
1094
1095                 local_irq_save(flags);
1096                 sd = &__get_cpu_var(softnet_data);
1097                 dev->next_sched = sd->output_queue;
1098                 sd->output_queue = dev;
1099                 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1100                 local_irq_restore(flags);
1101         }
1102 }
1103 EXPORT_SYMBOL(__netif_schedule);
1104
1105 void __netif_rx_schedule(struct net_device *dev)
1106 {
1107         unsigned long flags;
1108
1109         local_irq_save(flags);
1110         dev_hold(dev);
1111         list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
1112         if (dev->quota < 0)
1113                 dev->quota += dev->weight;
1114         else
1115                 dev->quota = dev->weight;
1116         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
1117         local_irq_restore(flags);
1118 }
1119 EXPORT_SYMBOL(__netif_rx_schedule);
1120
1121 void dev_kfree_skb_any(struct sk_buff *skb)
1122 {
1123         if (in_irq() || irqs_disabled())
1124                 dev_kfree_skb_irq(skb);
1125         else
1126                 dev_kfree_skb(skb);
1127 }
1128 EXPORT_SYMBOL(dev_kfree_skb_any);
1129
1130
1131 /* Hot-plugging. */
1132 void netif_device_detach(struct net_device *dev)
1133 {
1134         if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1135             netif_running(dev)) {
1136                 netif_stop_queue(dev);
1137         }
1138 }
1139 EXPORT_SYMBOL(netif_device_detach);
1140
1141 void netif_device_attach(struct net_device *dev)
1142 {
1143         if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1144             netif_running(dev)) {
1145                 netif_wake_queue(dev);
1146                 __netdev_watchdog_up(dev);
1147         }
1148 }
1149 EXPORT_SYMBOL(netif_device_attach);
1150
1151
1152 /*
1153  * Invalidate hardware checksum when packet is to be mangled, and
1154  * complete checksum manually on outgoing path.
1155  */
1156 int skb_checksum_help(struct sk_buff *skb)
1157 {
1158         __wsum csum;
1159         int ret = 0, offset = skb->h.raw - skb->data;
1160
1161         if (skb->ip_summed == CHECKSUM_COMPLETE)
1162                 goto out_set_summed;
1163
1164         if (unlikely(skb_shinfo(skb)->gso_size)) {
1165                 /* Let GSO fix up the checksum. */
1166                 goto out_set_summed;
1167         }
1168
1169         if (skb_cloned(skb)) {
1170                 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1171                 if (ret)
1172                         goto out;
1173         }
1174
1175         BUG_ON(offset > (int)skb->len);
1176         csum = skb_checksum(skb, offset, skb->len-offset, 0);
1177
1178         offset = skb->tail - skb->h.raw;
1179         BUG_ON(offset <= 0);
1180         BUG_ON(skb->csum_offset + 2 > offset);
1181
1182         *(__sum16*)(skb->h.raw + skb->csum_offset) = csum_fold(csum);
1183
1184 out_set_summed:
1185         skb->ip_summed = CHECKSUM_NONE;
1186 out:
1187         return ret;
1188 }
1189
1190 /**
1191  *      skb_gso_segment - Perform segmentation on skb.
1192  *      @skb: buffer to segment
1193  *      @features: features for the output path (see dev->features)
1194  *
1195  *      This function segments the given skb and returns a list of segments.
1196  *
1197  *      It may return NULL if the skb requires no segmentation.  This is
1198  *      only possible when GSO is used for verifying header integrity.
1199  */
1200 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1201 {
1202         struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1203         struct packet_type *ptype;
1204         __be16 type = skb->protocol;
1205         int err;
1206
1207         BUG_ON(skb_shinfo(skb)->frag_list);
1208
1209         skb_reset_mac_header(skb);
1210         skb->mac_len = skb->nh.raw - skb->data;
1211         __skb_pull(skb, skb->mac_len);
1212
1213         if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1214                 if (skb_header_cloned(skb) &&
1215                     (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1216                         return ERR_PTR(err);
1217         }
1218
1219         rcu_read_lock();
1220         list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
1221                 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1222                         if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1223                                 err = ptype->gso_send_check(skb);
1224                                 segs = ERR_PTR(err);
1225                                 if (err || skb_gso_ok(skb, features))
1226                                         break;
1227                                 __skb_push(skb, skb->data - skb->nh.raw);
1228                         }
1229                         segs = ptype->gso_segment(skb, features);
1230                         break;
1231                 }
1232         }
1233         rcu_read_unlock();
1234
1235         __skb_push(skb, skb->data - skb_mac_header(skb));
1236
1237         return segs;
1238 }
1239
1240 EXPORT_SYMBOL(skb_gso_segment);
1241
1242 /* Take action when hardware reception checksum errors are detected. */
1243 #ifdef CONFIG_BUG
1244 void netdev_rx_csum_fault(struct net_device *dev)
1245 {
1246         if (net_ratelimit()) {
1247                 printk(KERN_ERR "%s: hw csum failure.\n",
1248                         dev ? dev->name : "<unknown>");
1249                 dump_stack();
1250         }
1251 }
1252 EXPORT_SYMBOL(netdev_rx_csum_fault);
1253 #endif
1254
1255 /* Actually, we should eliminate this check as soon as we know, that:
1256  * 1. IOMMU is present and allows to map all the memory.
1257  * 2. No high memory really exists on this machine.
1258  */
1259
1260 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1261 {
1262 #ifdef CONFIG_HIGHMEM
1263         int i;
1264
1265         if (dev->features & NETIF_F_HIGHDMA)
1266                 return 0;
1267
1268         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1269                 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1270                         return 1;
1271
1272 #endif
1273         return 0;
1274 }
1275
1276 struct dev_gso_cb {
1277         void (*destructor)(struct sk_buff *skb);
1278 };
1279
1280 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1281
1282 static void dev_gso_skb_destructor(struct sk_buff *skb)
1283 {
1284         struct dev_gso_cb *cb;
1285
1286         do {
1287                 struct sk_buff *nskb = skb->next;
1288
1289                 skb->next = nskb->next;
1290                 nskb->next = NULL;
1291                 kfree_skb(nskb);
1292         } while (skb->next);
1293
1294         cb = DEV_GSO_CB(skb);
1295         if (cb->destructor)
1296                 cb->destructor(skb);
1297 }
1298
1299 /**
1300  *      dev_gso_segment - Perform emulated hardware segmentation on skb.
1301  *      @skb: buffer to segment
1302  *
1303  *      This function segments the given skb and stores the list of segments
1304  *      in skb->next.
1305  */
1306 static int dev_gso_segment(struct sk_buff *skb)
1307 {
1308         struct net_device *dev = skb->dev;
1309         struct sk_buff *segs;
1310         int features = dev->features & ~(illegal_highdma(dev, skb) ?
1311                                          NETIF_F_SG : 0);
1312
1313         segs = skb_gso_segment(skb, features);
1314
1315         /* Verifying header integrity only. */
1316         if (!segs)
1317                 return 0;
1318
1319         if (unlikely(IS_ERR(segs)))
1320                 return PTR_ERR(segs);
1321
1322         skb->next = segs;
1323         DEV_GSO_CB(skb)->destructor = skb->destructor;
1324         skb->destructor = dev_gso_skb_destructor;
1325
1326         return 0;
1327 }
1328
1329 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
1330 {
1331         if (likely(!skb->next)) {
1332                 if (netdev_nit)
1333                         dev_queue_xmit_nit(skb, dev);
1334
1335                 if (netif_needs_gso(dev, skb)) {
1336                         if (unlikely(dev_gso_segment(skb)))
1337                                 goto out_kfree_skb;
1338                         if (skb->next)
1339                                 goto gso;
1340                 }
1341
1342                 return dev->hard_start_xmit(skb, dev);
1343         }
1344
1345 gso:
1346         do {
1347                 struct sk_buff *nskb = skb->next;
1348                 int rc;
1349
1350                 skb->next = nskb->next;
1351                 nskb->next = NULL;
1352                 rc = dev->hard_start_xmit(nskb, dev);
1353                 if (unlikely(rc)) {
1354                         nskb->next = skb->next;
1355                         skb->next = nskb;
1356                         return rc;
1357                 }
1358                 if (unlikely(netif_queue_stopped(dev) && skb->next))
1359                         return NETDEV_TX_BUSY;
1360         } while (skb->next);
1361
1362         skb->destructor = DEV_GSO_CB(skb)->destructor;
1363
1364 out_kfree_skb:
1365         kfree_skb(skb);
1366         return 0;
1367 }
1368
1369 #define HARD_TX_LOCK(dev, cpu) {                        \
1370         if ((dev->features & NETIF_F_LLTX) == 0) {      \
1371                 netif_tx_lock(dev);                     \
1372         }                                               \
1373 }
1374
1375 #define HARD_TX_UNLOCK(dev) {                           \
1376         if ((dev->features & NETIF_F_LLTX) == 0) {      \
1377                 netif_tx_unlock(dev);                   \
1378         }                                               \
1379 }
1380
1381 /**
1382  *      dev_queue_xmit - transmit a buffer
1383  *      @skb: buffer to transmit
1384  *
1385  *      Queue a buffer for transmission to a network device. The caller must
1386  *      have set the device and priority and built the buffer before calling
1387  *      this function. The function can be called from an interrupt.
1388  *
1389  *      A negative errno code is returned on a failure. A success does not
1390  *      guarantee the frame will be transmitted as it may be dropped due
1391  *      to congestion or traffic shaping.
1392  *
1393  * -----------------------------------------------------------------------------------
1394  *      I notice this method can also return errors from the queue disciplines,
1395  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1396  *      be positive.
1397  *
1398  *      Regardless of the return value, the skb is consumed, so it is currently
1399  *      difficult to retry a send to this method.  (You can bump the ref count
1400  *      before sending to hold a reference for retry if you are careful.)
1401  *
1402  *      When calling this method, interrupts MUST be enabled.  This is because
1403  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1404  *          --BLG
1405  */
1406
1407 int dev_queue_xmit(struct sk_buff *skb)
1408 {
1409         struct net_device *dev = skb->dev;
1410         struct Qdisc *q;
1411         int rc = -ENOMEM;
1412
1413         /* GSO will handle the following emulations directly. */
1414         if (netif_needs_gso(dev, skb))
1415                 goto gso;
1416
1417         if (skb_shinfo(skb)->frag_list &&
1418             !(dev->features & NETIF_F_FRAGLIST) &&
1419             __skb_linearize(skb))
1420                 goto out_kfree_skb;
1421
1422         /* Fragmented skb is linearized if device does not support SG,
1423          * or if at least one of fragments is in highmem and device
1424          * does not support DMA from it.
1425          */
1426         if (skb_shinfo(skb)->nr_frags &&
1427             (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1428             __skb_linearize(skb))
1429                 goto out_kfree_skb;
1430
1431         /* If packet is not checksummed and device does not support
1432          * checksumming for this protocol, complete checksumming here.
1433          */
1434         if (skb->ip_summed == CHECKSUM_PARTIAL &&
1435             (!(dev->features & NETIF_F_GEN_CSUM) &&
1436              (!(dev->features & NETIF_F_IP_CSUM) ||
1437               skb->protocol != htons(ETH_P_IP))))
1438                 if (skb_checksum_help(skb))
1439                         goto out_kfree_skb;
1440
1441 gso:
1442         spin_lock_prefetch(&dev->queue_lock);
1443
1444         /* Disable soft irqs for various locks below. Also
1445          * stops preemption for RCU.
1446          */
1447         rcu_read_lock_bh();
1448
1449         /* Updates of qdisc are serialized by queue_lock.
1450          * The struct Qdisc which is pointed to by qdisc is now a
1451          * rcu structure - it may be accessed without acquiring
1452          * a lock (but the structure may be stale.) The freeing of the
1453          * qdisc will be deferred until it's known that there are no
1454          * more references to it.
1455          *
1456          * If the qdisc has an enqueue function, we still need to
1457          * hold the queue_lock before calling it, since queue_lock
1458          * also serializes access to the device queue.
1459          */
1460
1461         q = rcu_dereference(dev->qdisc);
1462 #ifdef CONFIG_NET_CLS_ACT
1463         skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1464 #endif
1465         if (q->enqueue) {
1466                 /* Grab device queue */
1467                 spin_lock(&dev->queue_lock);
1468                 q = dev->qdisc;
1469                 if (q->enqueue) {
1470                         rc = q->enqueue(skb, q);
1471                         qdisc_run(dev);
1472                         spin_unlock(&dev->queue_lock);
1473
1474                         rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1475                         goto out;
1476                 }
1477                 spin_unlock(&dev->queue_lock);
1478         }
1479
1480         /* The device has no queue. Common case for software devices:
1481            loopback, all the sorts of tunnels...
1482
1483            Really, it is unlikely that netif_tx_lock protection is necessary
1484            here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1485            counters.)
1486            However, it is possible, that they rely on protection
1487            made by us here.
1488
1489            Check this and shot the lock. It is not prone from deadlocks.
1490            Either shot noqueue qdisc, it is even simpler 8)
1491          */
1492         if (dev->flags & IFF_UP) {
1493                 int cpu = smp_processor_id(); /* ok because BHs are off */
1494
1495                 if (dev->xmit_lock_owner != cpu) {
1496
1497                         HARD_TX_LOCK(dev, cpu);
1498
1499                         if (!netif_queue_stopped(dev)) {
1500                                 rc = 0;
1501                                 if (!dev_hard_start_xmit(skb, dev)) {
1502                                         HARD_TX_UNLOCK(dev);
1503                                         goto out;
1504                                 }
1505                         }
1506                         HARD_TX_UNLOCK(dev);
1507                         if (net_ratelimit())
1508                                 printk(KERN_CRIT "Virtual device %s asks to "
1509                                        "queue packet!\n", dev->name);
1510                 } else {
1511                         /* Recursion is detected! It is possible,
1512                          * unfortunately */
1513                         if (net_ratelimit())
1514                                 printk(KERN_CRIT "Dead loop on virtual device "
1515                                        "%s, fix it urgently!\n", dev->name);
1516                 }
1517         }
1518
1519         rc = -ENETDOWN;
1520         rcu_read_unlock_bh();
1521
1522 out_kfree_skb:
1523         kfree_skb(skb);
1524         return rc;
1525 out:
1526         rcu_read_unlock_bh();
1527         return rc;
1528 }
1529
1530
1531 /*=======================================================================
1532                         Receiver routines
1533   =======================================================================*/
1534
1535 int netdev_max_backlog = 1000;
1536 int netdev_budget = 300;
1537 int weight_p = 64;            /* old backlog weight */
1538
1539 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1540
1541
1542 /**
1543  *      netif_rx        -       post buffer to the network code
1544  *      @skb: buffer to post
1545  *
1546  *      This function receives a packet from a device driver and queues it for
1547  *      the upper (protocol) levels to process.  It always succeeds. The buffer
1548  *      may be dropped during processing for congestion control or by the
1549  *      protocol layers.
1550  *
1551  *      return values:
1552  *      NET_RX_SUCCESS  (no congestion)
1553  *      NET_RX_CN_LOW   (low congestion)
1554  *      NET_RX_CN_MOD   (moderate congestion)
1555  *      NET_RX_CN_HIGH  (high congestion)
1556  *      NET_RX_DROP     (packet was dropped)
1557  *
1558  */
1559
1560 int netif_rx(struct sk_buff *skb)
1561 {
1562         struct softnet_data *queue;
1563         unsigned long flags;
1564
1565         /* if netpoll wants it, pretend we never saw it */
1566         if (netpoll_rx(skb))
1567                 return NET_RX_DROP;
1568
1569         if (!skb->tstamp.tv64)
1570                 net_timestamp(skb);
1571
1572         /*
1573          * The code is rearranged so that the path is the most
1574          * short when CPU is congested, but is still operating.
1575          */
1576         local_irq_save(flags);
1577         queue = &__get_cpu_var(softnet_data);
1578
1579         __get_cpu_var(netdev_rx_stat).total++;
1580         if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1581                 if (queue->input_pkt_queue.qlen) {
1582 enqueue:
1583                         dev_hold(skb->dev);
1584                         __skb_queue_tail(&queue->input_pkt_queue, skb);
1585                         local_irq_restore(flags);
1586                         return NET_RX_SUCCESS;
1587                 }
1588
1589                 netif_rx_schedule(&queue->backlog_dev);
1590                 goto enqueue;
1591         }
1592
1593         __get_cpu_var(netdev_rx_stat).dropped++;
1594         local_irq_restore(flags);
1595
1596         kfree_skb(skb);
1597         return NET_RX_DROP;
1598 }
1599
1600 int netif_rx_ni(struct sk_buff *skb)
1601 {
1602         int err;
1603
1604         preempt_disable();
1605         err = netif_rx(skb);
1606         if (local_softirq_pending())
1607                 do_softirq();
1608         preempt_enable();
1609
1610         return err;
1611 }
1612
1613 EXPORT_SYMBOL(netif_rx_ni);
1614
1615 static inline struct net_device *skb_bond(struct sk_buff *skb)
1616 {
1617         struct net_device *dev = skb->dev;
1618
1619         if (dev->master) {
1620                 if (skb_bond_should_drop(skb)) {
1621                         kfree_skb(skb);
1622                         return NULL;
1623                 }
1624                 skb->dev = dev->master;
1625         }
1626
1627         return dev;
1628 }
1629
1630 static void net_tx_action(struct softirq_action *h)
1631 {
1632         struct softnet_data *sd = &__get_cpu_var(softnet_data);
1633
1634         if (sd->completion_queue) {
1635                 struct sk_buff *clist;
1636
1637                 local_irq_disable();
1638                 clist = sd->completion_queue;
1639                 sd->completion_queue = NULL;
1640                 local_irq_enable();
1641
1642                 while (clist) {
1643                         struct sk_buff *skb = clist;
1644                         clist = clist->next;
1645
1646                         BUG_TRAP(!atomic_read(&skb->users));
1647                         __kfree_skb(skb);
1648                 }
1649         }
1650
1651         if (sd->output_queue) {
1652                 struct net_device *head;
1653
1654                 local_irq_disable();
1655                 head = sd->output_queue;
1656                 sd->output_queue = NULL;
1657                 local_irq_enable();
1658
1659                 while (head) {
1660                         struct net_device *dev = head;
1661                         head = head->next_sched;
1662
1663                         smp_mb__before_clear_bit();
1664                         clear_bit(__LINK_STATE_SCHED, &dev->state);
1665
1666                         if (spin_trylock(&dev->queue_lock)) {
1667                                 qdisc_run(dev);
1668                                 spin_unlock(&dev->queue_lock);
1669                         } else {
1670                                 netif_schedule(dev);
1671                         }
1672                 }
1673         }
1674 }
1675
1676 static inline int deliver_skb(struct sk_buff *skb,
1677                               struct packet_type *pt_prev,
1678                               struct net_device *orig_dev)
1679 {
1680         atomic_inc(&skb->users);
1681         return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1682 }
1683
1684 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1685 int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
1686 struct net_bridge;
1687 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1688                                                 unsigned char *addr);
1689 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
1690
1691 static __inline__ int handle_bridge(struct sk_buff **pskb,
1692                                     struct packet_type **pt_prev, int *ret,
1693                                     struct net_device *orig_dev)
1694 {
1695         struct net_bridge_port *port;
1696
1697         if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
1698             (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
1699                 return 0;
1700
1701         if (*pt_prev) {
1702                 *ret = deliver_skb(*pskb, *pt_prev, orig_dev);
1703                 *pt_prev = NULL;
1704         }
1705
1706         return br_handle_frame_hook(port, pskb);
1707 }
1708 #else
1709 #define handle_bridge(skb, pt_prev, ret, orig_dev)      (0)
1710 #endif
1711
1712 #ifdef CONFIG_NET_CLS_ACT
1713 /* TODO: Maybe we should just force sch_ingress to be compiled in
1714  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1715  * a compare and 2 stores extra right now if we dont have it on
1716  * but have CONFIG_NET_CLS_ACT
1717  * NOTE: This doesnt stop any functionality; if you dont have
1718  * the ingress scheduler, you just cant add policies on ingress.
1719  *
1720  */
1721 static int ing_filter(struct sk_buff *skb)
1722 {
1723         struct Qdisc *q;
1724         struct net_device *dev = skb->dev;
1725         int result = TC_ACT_OK;
1726
1727         if (dev->qdisc_ingress) {
1728                 __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
1729                 if (MAX_RED_LOOP < ttl++) {
1730                         printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n",
1731                                 skb->iif, skb->dev->ifindex);
1732                         return TC_ACT_SHOT;
1733                 }
1734
1735                 skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
1736
1737                 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
1738
1739                 spin_lock(&dev->queue_lock);
1740                 if ((q = dev->qdisc_ingress) != NULL)
1741                         result = q->enqueue(skb, q);
1742                 spin_unlock(&dev->queue_lock);
1743
1744         }
1745
1746         return result;
1747 }
1748 #endif
1749
1750 int netif_receive_skb(struct sk_buff *skb)
1751 {
1752         struct packet_type *ptype, *pt_prev;
1753         struct net_device *orig_dev;
1754         int ret = NET_RX_DROP;
1755         __be16 type;
1756
1757         /* if we've gotten here through NAPI, check netpoll */
1758         if (skb->dev->poll && netpoll_rx(skb))
1759                 return NET_RX_DROP;
1760
1761         if (!skb->tstamp.tv64)
1762                 net_timestamp(skb);
1763
1764         if (!skb->iif)
1765                 skb->iif = skb->dev->ifindex;
1766
1767         orig_dev = skb_bond(skb);
1768
1769         if (!orig_dev)
1770                 return NET_RX_DROP;
1771
1772         __get_cpu_var(netdev_rx_stat).total++;
1773
1774         skb_reset_network_header(skb);
1775         skb->h.raw = skb->data;
1776         skb->mac_len = skb->nh.raw - skb->mac.raw;
1777
1778         pt_prev = NULL;
1779
1780         rcu_read_lock();
1781
1782 #ifdef CONFIG_NET_CLS_ACT
1783         if (skb->tc_verd & TC_NCLS) {
1784                 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
1785                 goto ncls;
1786         }
1787 #endif
1788
1789         list_for_each_entry_rcu(ptype, &ptype_all, list) {
1790                 if (!ptype->dev || ptype->dev == skb->dev) {
1791                         if (pt_prev)
1792                                 ret = deliver_skb(skb, pt_prev, orig_dev);
1793                         pt_prev = ptype;
1794                 }
1795         }
1796
1797 #ifdef CONFIG_NET_CLS_ACT
1798         if (pt_prev) {
1799                 ret = deliver_skb(skb, pt_prev, orig_dev);
1800                 pt_prev = NULL; /* noone else should process this after*/
1801         } else {
1802                 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1803         }
1804
1805         ret = ing_filter(skb);
1806
1807         if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
1808                 kfree_skb(skb);
1809                 goto out;
1810         }
1811
1812         skb->tc_verd = 0;
1813 ncls:
1814 #endif
1815
1816         if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
1817                 goto out;
1818
1819         type = skb->protocol;
1820         list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
1821                 if (ptype->type == type &&
1822                     (!ptype->dev || ptype->dev == skb->dev)) {
1823                         if (pt_prev)
1824                                 ret = deliver_skb(skb, pt_prev, orig_dev);
1825                         pt_prev = ptype;
1826                 }
1827         }
1828
1829         if (pt_prev) {
1830                 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1831         } else {
1832                 kfree_skb(skb);
1833                 /* Jamal, now you will not able to escape explaining
1834                  * me how you were going to use this. :-)
1835                  */
1836                 ret = NET_RX_DROP;
1837         }
1838
1839 out:
1840         rcu_read_unlock();
1841         return ret;
1842 }
1843
1844 static int process_backlog(struct net_device *backlog_dev, int *budget)
1845 {
1846         int work = 0;
1847         int quota = min(backlog_dev->quota, *budget);
1848         struct softnet_data *queue = &__get_cpu_var(softnet_data);
1849         unsigned long start_time = jiffies;
1850
1851         backlog_dev->weight = weight_p;
1852         for (;;) {
1853                 struct sk_buff *skb;
1854                 struct net_device *dev;
1855
1856                 local_irq_disable();
1857                 skb = __skb_dequeue(&queue->input_pkt_queue);
1858                 if (!skb)
1859                         goto job_done;
1860                 local_irq_enable();
1861
1862                 dev = skb->dev;
1863
1864                 netif_receive_skb(skb);
1865
1866                 dev_put(dev);
1867
1868                 work++;
1869
1870                 if (work >= quota || jiffies - start_time > 1)
1871                         break;
1872
1873         }
1874
1875         backlog_dev->quota -= work;
1876         *budget -= work;
1877         return -1;
1878
1879 job_done:
1880         backlog_dev->quota -= work;
1881         *budget -= work;
1882
1883         list_del(&backlog_dev->poll_list);
1884         smp_mb__before_clear_bit();
1885         netif_poll_enable(backlog_dev);
1886
1887         local_irq_enable();
1888         return 0;
1889 }
1890
1891 static void net_rx_action(struct softirq_action *h)
1892 {
1893         struct softnet_data *queue = &__get_cpu_var(softnet_data);
1894         unsigned long start_time = jiffies;
1895         int budget = netdev_budget;
1896         void *have;
1897
1898         local_irq_disable();
1899
1900         while (!list_empty(&queue->poll_list)) {
1901                 struct net_device *dev;
1902
1903                 if (budget <= 0 || jiffies - start_time > 1)
1904                         goto softnet_break;
1905
1906                 local_irq_enable();
1907
1908                 dev = list_entry(queue->poll_list.next,
1909                                  struct net_device, poll_list);
1910                 have = netpoll_poll_lock(dev);
1911
1912                 if (dev->quota <= 0 || dev->poll(dev, &budget)) {
1913                         netpoll_poll_unlock(have);
1914                         local_irq_disable();
1915                         list_move_tail(&dev->poll_list, &queue->poll_list);
1916                         if (dev->quota < 0)
1917                                 dev->quota += dev->weight;
1918                         else
1919                                 dev->quota = dev->weight;
1920                 } else {
1921                         netpoll_poll_unlock(have);
1922                         dev_put(dev);
1923                         local_irq_disable();
1924                 }
1925         }
1926 out:
1927 #ifdef CONFIG_NET_DMA
1928         /*
1929          * There may not be any more sk_buffs coming right now, so push
1930          * any pending DMA copies to hardware
1931          */
1932         if (net_dma_client) {
1933                 struct dma_chan *chan;
1934                 rcu_read_lock();
1935                 list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node)
1936                         dma_async_memcpy_issue_pending(chan);
1937                 rcu_read_unlock();
1938         }
1939 #endif
1940         local_irq_enable();
1941         return;
1942
1943 softnet_break:
1944         __get_cpu_var(netdev_rx_stat).time_squeeze++;
1945         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
1946         goto out;
1947 }
1948
1949 static gifconf_func_t * gifconf_list [NPROTO];
1950
1951 /**
1952  *      register_gifconf        -       register a SIOCGIF handler
1953  *      @family: Address family
1954  *      @gifconf: Function handler
1955  *
1956  *      Register protocol dependent address dumping routines. The handler
1957  *      that is passed must not be freed or reused until it has been replaced
1958  *      by another handler.
1959  */
1960 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
1961 {
1962         if (family >= NPROTO)
1963                 return -EINVAL;
1964         gifconf_list[family] = gifconf;
1965         return 0;
1966 }
1967
1968
1969 /*
1970  *      Map an interface index to its name (SIOCGIFNAME)
1971  */
1972
1973 /*
1974  *      We need this ioctl for efficient implementation of the
1975  *      if_indextoname() function required by the IPv6 API.  Without
1976  *      it, we would have to search all the interfaces to find a
1977  *      match.  --pb
1978  */
1979
1980 static int dev_ifname(struct ifreq __user *arg)
1981 {
1982         struct net_device *dev;
1983         struct ifreq ifr;
1984
1985         /*
1986          *      Fetch the caller's info block.
1987          */
1988
1989         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
1990                 return -EFAULT;
1991
1992         read_lock(&dev_base_lock);
1993         dev = __dev_get_by_index(ifr.ifr_ifindex);
1994         if (!dev) {
1995                 read_unlock(&dev_base_lock);
1996                 return -ENODEV;
1997         }
1998
1999         strcpy(ifr.ifr_name, dev->name);
2000         read_unlock(&dev_base_lock);
2001
2002         if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2003                 return -EFAULT;
2004         return 0;
2005 }
2006
2007 /*
2008  *      Perform a SIOCGIFCONF call. This structure will change
2009  *      size eventually, and there is nothing I can do about it.
2010  *      Thus we will need a 'compatibility mode'.
2011  */
2012
2013 static int dev_ifconf(char __user *arg)
2014 {
2015         struct ifconf ifc;
2016         struct net_device *dev;
2017         char __user *pos;
2018         int len;
2019         int total;
2020         int i;
2021
2022         /*
2023          *      Fetch the caller's info block.
2024          */
2025
2026         if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2027                 return -EFAULT;
2028
2029         pos = ifc.ifc_buf;
2030         len = ifc.ifc_len;
2031
2032         /*
2033          *      Loop over the interfaces, and write an info block for each.
2034          */
2035
2036         total = 0;
2037         for (dev = dev_base; dev; dev = dev->next) {
2038                 for (i = 0; i < NPROTO; i++) {
2039                         if (gifconf_list[i]) {
2040                                 int done;
2041                                 if (!pos)
2042                                         done = gifconf_list[i](dev, NULL, 0);
2043                                 else
2044                                         done = gifconf_list[i](dev, pos + total,
2045                                                                len - total);
2046                                 if (done < 0)
2047                                         return -EFAULT;
2048                                 total += done;
2049                         }
2050                 }
2051         }
2052
2053         /*
2054          *      All done.  Write the updated control block back to the caller.
2055          */
2056         ifc.ifc_len = total;
2057
2058         /*
2059          *      Both BSD and Solaris return 0 here, so we do too.
2060          */
2061         return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2062 }
2063
2064 #ifdef CONFIG_PROC_FS
2065 /*
2066  *      This is invoked by the /proc filesystem handler to display a device
2067  *      in detail.
2068  */
2069 static struct net_device *dev_get_idx(loff_t pos)
2070 {
2071         struct net_device *dev;
2072         loff_t i;
2073
2074         for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
2075
2076         return i == pos ? dev : NULL;
2077 }
2078
2079 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2080 {
2081         read_lock(&dev_base_lock);
2082         return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
2083 }
2084
2085 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2086 {
2087         ++*pos;
2088         return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
2089 }
2090
2091 void dev_seq_stop(struct seq_file *seq, void *v)
2092 {
2093         read_unlock(&dev_base_lock);
2094 }
2095
2096 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2097 {
2098         if (dev->get_stats) {
2099                 struct net_device_stats *stats = dev->get_stats(dev);
2100
2101                 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2102                                 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2103                            dev->name, stats->rx_bytes, stats->rx_packets,
2104                            stats->rx_errors,
2105                            stats->rx_dropped + stats->rx_missed_errors,
2106                            stats->rx_fifo_errors,
2107                            stats->rx_length_errors + stats->rx_over_errors +
2108                              stats->rx_crc_errors + stats->rx_frame_errors,
2109                            stats->rx_compressed, stats->multicast,
2110                            stats->tx_bytes, stats->tx_packets,
2111                            stats->tx_errors, stats->tx_dropped,
2112                            stats->tx_fifo_errors, stats->collisions,
2113                            stats->tx_carrier_errors +
2114                              stats->tx_aborted_errors +
2115                              stats->tx_window_errors +
2116                              stats->tx_heartbeat_errors,
2117                            stats->tx_compressed);
2118         } else
2119                 seq_printf(seq, "%6s: No statistics available.\n", dev->name);
2120 }
2121
2122 /*
2123  *      Called from the PROCfs module. This now uses the new arbitrary sized
2124  *      /proc/net interface to create /proc/net/dev
2125  */
2126 static int dev_seq_show(struct seq_file *seq, void *v)
2127 {
2128         if (v == SEQ_START_TOKEN)
2129                 seq_puts(seq, "Inter-|   Receive                            "
2130                               "                    |  Transmit\n"
2131                               " face |bytes    packets errs drop fifo frame "
2132                               "compressed multicast|bytes    packets errs "
2133                               "drop fifo colls carrier compressed\n");
2134         else
2135                 dev_seq_printf_stats(seq, v);
2136         return 0;
2137 }
2138
2139 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2140 {
2141         struct netif_rx_stats *rc = NULL;
2142
2143         while (*pos < NR_CPUS)
2144                 if (cpu_online(*pos)) {
2145                         rc = &per_cpu(netdev_rx_stat, *pos);
2146                         break;
2147                 } else
2148                         ++*pos;
2149         return rc;
2150 }
2151
2152 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2153 {
2154         return softnet_get_online(pos);
2155 }
2156
2157 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2158 {
2159         ++*pos;
2160         return softnet_get_online(pos);
2161 }
2162
2163 static void softnet_seq_stop(struct seq_file *seq, void *v)
2164 {
2165 }
2166
2167 static int softnet_seq_show(struct seq_file *seq, void *v)
2168 {
2169         struct netif_rx_stats *s = v;
2170
2171         seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2172                    s->total, s->dropped, s->time_squeeze, 0,
2173                    0, 0, 0, 0, /* was fastroute */
2174                    s->cpu_collision );
2175         return 0;
2176 }
2177
2178 static struct seq_operations dev_seq_ops = {
2179         .start = dev_seq_start,
2180         .next  = dev_seq_next,
2181         .stop  = dev_seq_stop,
2182         .show  = dev_seq_show,
2183 };
2184
2185 static int dev_seq_open(struct inode *inode, struct file *file)
2186 {
2187         return seq_open(file, &dev_seq_ops);
2188 }
2189
2190 static const struct file_operations dev_seq_fops = {
2191         .owner   = THIS_MODULE,
2192         .open    = dev_seq_open,
2193         .read    = seq_read,
2194         .llseek  = seq_lseek,
2195         .release = seq_release,
2196 };
2197
2198 static struct seq_operations softnet_seq_ops = {
2199         .start = softnet_seq_start,
2200         .next  = softnet_seq_next,
2201         .stop  = softnet_seq_stop,
2202         .show  = softnet_seq_show,
2203 };
2204
2205 static int softnet_seq_open(struct inode *inode, struct file *file)
2206 {
2207         return seq_open(file, &softnet_seq_ops);
2208 }
2209
2210 static const struct file_operations softnet_seq_fops = {
2211         .owner   = THIS_MODULE,
2212         .open    = softnet_seq_open,
2213         .read    = seq_read,
2214         .llseek  = seq_lseek,
2215         .release = seq_release,
2216 };
2217
2218 #ifdef CONFIG_WIRELESS_EXT
2219 extern int wireless_proc_init(void);
2220 #else
2221 #define wireless_proc_init() 0
2222 #endif
2223
2224 static int __init dev_proc_init(void)
2225 {
2226         int rc = -ENOMEM;
2227
2228         if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
2229                 goto out;
2230         if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
2231                 goto out_dev;
2232         if (wireless_proc_init())
2233                 goto out_softnet;
2234         rc = 0;
2235 out:
2236         return rc;
2237 out_softnet:
2238         proc_net_remove("softnet_stat");
2239 out_dev:
2240         proc_net_remove("dev");
2241         goto out;
2242 }
2243 #else
2244 #define dev_proc_init() 0
2245 #endif  /* CONFIG_PROC_FS */
2246
2247
2248 /**
2249  *      netdev_set_master       -       set up master/slave pair
2250  *      @slave: slave device
2251  *      @master: new master device
2252  *
2253  *      Changes the master device of the slave. Pass %NULL to break the
2254  *      bonding. The caller must hold the RTNL semaphore. On a failure
2255  *      a negative errno code is returned. On success the reference counts
2256  *      are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2257  *      function returns zero.
2258  */
2259 int netdev_set_master(struct net_device *slave, struct net_device *master)
2260 {
2261         struct net_device *old = slave->master;
2262
2263         ASSERT_RTNL();
2264
2265         if (master) {
2266                 if (old)
2267                         return -EBUSY;
2268                 dev_hold(master);
2269         }
2270
2271         slave->master = master;
2272
2273         synchronize_net();
2274
2275         if (old)
2276                 dev_put(old);
2277
2278         if (master)
2279                 slave->flags |= IFF_SLAVE;
2280         else
2281                 slave->flags &= ~IFF_SLAVE;
2282
2283         rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2284         return 0;
2285 }
2286
2287 /**
2288  *      dev_set_promiscuity     - update promiscuity count on a device
2289  *      @dev: device
2290  *      @inc: modifier
2291  *
2292  *      Add or remove promiscuity from a device. While the count in the device
2293  *      remains above zero the interface remains promiscuous. Once it hits zero
2294  *      the device reverts back to normal filtering operation. A negative inc
2295  *      value is used to drop promiscuity on the device.
2296  */
2297 void dev_set_promiscuity(struct net_device *dev, int inc)
2298 {
2299         unsigned short old_flags = dev->flags;
2300
2301         if ((dev->promiscuity += inc) == 0)
2302                 dev->flags &= ~IFF_PROMISC;
2303         else
2304                 dev->flags |= IFF_PROMISC;
2305         if (dev->flags != old_flags) {
2306                 dev_mc_upload(dev);
2307                 printk(KERN_INFO "device %s %s promiscuous mode\n",
2308                        dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2309                                                                "left");
2310                 audit_log(current->audit_context, GFP_ATOMIC,
2311                         AUDIT_ANOM_PROMISCUOUS,
2312                         "dev=%s prom=%d old_prom=%d auid=%u",
2313                         dev->name, (dev->flags & IFF_PROMISC),
2314                         (old_flags & IFF_PROMISC),
2315                         audit_get_loginuid(current->audit_context));
2316         }
2317 }
2318
2319 /**
2320  *      dev_set_allmulti        - update allmulti count on a device
2321  *      @dev: device
2322  *      @inc: modifier
2323  *
2324  *      Add or remove reception of all multicast frames to a device. While the
2325  *      count in the device remains above zero the interface remains listening
2326  *      to all interfaces. Once it hits zero the device reverts back to normal
2327  *      filtering operation. A negative @inc value is used to drop the counter
2328  *      when releasing a resource needing all multicasts.
2329  */
2330
2331 void dev_set_allmulti(struct net_device *dev, int inc)
2332 {
2333         unsigned short old_flags = dev->flags;
2334
2335         dev->flags |= IFF_ALLMULTI;
2336         if ((dev->allmulti += inc) == 0)
2337                 dev->flags &= ~IFF_ALLMULTI;
2338         if (dev->flags ^ old_flags)
2339                 dev_mc_upload(dev);
2340 }
2341
2342 unsigned dev_get_flags(const struct net_device *dev)
2343 {
2344         unsigned flags;
2345
2346         flags = (dev->flags & ~(IFF_PROMISC |
2347                                 IFF_ALLMULTI |
2348                                 IFF_RUNNING |
2349                                 IFF_LOWER_UP |
2350                                 IFF_DORMANT)) |
2351                 (dev->gflags & (IFF_PROMISC |
2352                                 IFF_ALLMULTI));
2353
2354         if (netif_running(dev)) {
2355                 if (netif_oper_up(dev))
2356                         flags |= IFF_RUNNING;
2357                 if (netif_carrier_ok(dev))
2358                         flags |= IFF_LOWER_UP;
2359                 if (netif_dormant(dev))
2360                         flags |= IFF_DORMANT;
2361         }
2362
2363         return flags;
2364 }
2365
2366 int dev_change_flags(struct net_device *dev, unsigned flags)
2367 {
2368         int ret;
2369         int old_flags = dev->flags;
2370
2371         /*
2372          *      Set the flags on our device.
2373          */
2374
2375         dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2376                                IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2377                                IFF_AUTOMEDIA)) |
2378                      (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2379                                     IFF_ALLMULTI));
2380
2381         /*
2382          *      Load in the correct multicast list now the flags have changed.
2383          */
2384
2385         dev_mc_upload(dev);
2386
2387         /*
2388          *      Have we downed the interface. We handle IFF_UP ourselves
2389          *      according to user attempts to set it, rather than blindly
2390          *      setting it.
2391          */
2392
2393         ret = 0;
2394         if ((old_flags ^ flags) & IFF_UP) {     /* Bit is different  ? */
2395                 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
2396
2397                 if (!ret)
2398                         dev_mc_upload(dev);
2399         }
2400
2401         if (dev->flags & IFF_UP &&
2402             ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
2403                                           IFF_VOLATILE)))
2404                 raw_notifier_call_chain(&netdev_chain,
2405                                 NETDEV_CHANGE, dev);
2406
2407         if ((flags ^ dev->gflags) & IFF_PROMISC) {
2408                 int inc = (flags & IFF_PROMISC) ? +1 : -1;
2409                 dev->gflags ^= IFF_PROMISC;
2410                 dev_set_promiscuity(dev, inc);
2411         }
2412
2413         /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
2414            is important. Some (broken) drivers set IFF_PROMISC, when
2415            IFF_ALLMULTI is requested not asking us and not reporting.
2416          */
2417         if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
2418                 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
2419                 dev->gflags ^= IFF_ALLMULTI;
2420                 dev_set_allmulti(dev, inc);
2421         }
2422
2423         if (old_flags ^ dev->flags)
2424                 rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
2425
2426         return ret;
2427 }
2428
2429 int dev_set_mtu(struct net_device *dev, int new_mtu)
2430 {
2431         int err;
2432
2433         if (new_mtu == dev->mtu)
2434                 return 0;
2435
2436         /*      MTU must be positive.    */
2437         if (new_mtu < 0)
2438                 return -EINVAL;
2439
2440         if (!netif_device_present(dev))
2441                 return -ENODEV;
2442
2443         err = 0;
2444         if (dev->change_mtu)
2445                 err = dev->change_mtu(dev, new_mtu);
2446         else
2447                 dev->mtu = new_mtu;
2448         if (!err && dev->flags & IFF_UP)
2449                 raw_notifier_call_chain(&netdev_chain,
2450                                 NETDEV_CHANGEMTU, dev);
2451         return err;
2452 }
2453
2454 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
2455 {
2456         int err;
2457
2458         if (!dev->set_mac_address)
2459                 return -EOPNOTSUPP;
2460         if (sa->sa_family != dev->type)
2461                 return -EINVAL;
2462         if (!netif_device_present(dev))
2463                 return -ENODEV;
2464         err = dev->set_mac_address(dev, sa);
2465         if (!err)
2466                 raw_notifier_call_chain(&netdev_chain,
2467                                 NETDEV_CHANGEADDR, dev);
2468         return err;
2469 }
2470
2471 /*
2472  *      Perform the SIOCxIFxxx calls.
2473  */
2474 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2475 {
2476         int err;
2477         struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
2478
2479         if (!dev)
2480                 return -ENODEV;
2481
2482         switch (cmd) {
2483                 case SIOCGIFFLAGS:      /* Get interface flags */
2484                         ifr->ifr_flags = dev_get_flags(dev);
2485                         return 0;
2486
2487                 case SIOCSIFFLAGS:      /* Set interface flags */
2488                         return dev_change_flags(dev, ifr->ifr_flags);
2489
2490                 case SIOCGIFMETRIC:     /* Get the metric on the interface
2491                                            (currently unused) */
2492                         ifr->ifr_metric = 0;
2493                         return 0;
2494
2495                 case SIOCSIFMETRIC:     /* Set the metric on the interface
2496                                            (currently unused) */
2497                         return -EOPNOTSUPP;
2498
2499                 case SIOCGIFMTU:        /* Get the MTU of a device */
2500                         ifr->ifr_mtu = dev->mtu;
2501                         return 0;
2502
2503                 case SIOCSIFMTU:        /* Set the MTU of a device */
2504                         return dev_set_mtu(dev, ifr->ifr_mtu);
2505
2506                 case SIOCGIFHWADDR:
2507                         if (!dev->addr_len)
2508                                 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
2509                         else
2510                                 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
2511                                        min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2512                         ifr->ifr_hwaddr.sa_family = dev->type;
2513                         return 0;
2514
2515                 case SIOCSIFHWADDR:
2516                         return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
2517
2518                 case SIOCSIFHWBROADCAST:
2519                         if (ifr->ifr_hwaddr.sa_family != dev->type)
2520                                 return -EINVAL;
2521                         memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
2522                                min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2523                         raw_notifier_call_chain(&netdev_chain,
2524                                             NETDEV_CHANGEADDR, dev);
2525                         return 0;
2526
2527                 case SIOCGIFMAP:
2528                         ifr->ifr_map.mem_start = dev->mem_start;
2529                         ifr->ifr_map.mem_end   = dev->mem_end;
2530                         ifr->ifr_map.base_addr = dev->base_addr;
2531                         ifr->ifr_map.irq       = dev->irq;
2532                         ifr->ifr_map.dma       = dev->dma;
2533                         ifr->ifr_map.port      = dev->if_port;
2534                         return 0;
2535
2536                 case SIOCSIFMAP:
2537                         if (dev->set_config) {
2538                                 if (!netif_device_present(dev))
2539                                         return -ENODEV;
2540                                 return dev->set_config(dev, &ifr->ifr_map);
2541                         }
2542                         return -EOPNOTSUPP;
2543
2544                 case SIOCADDMULTI:
2545                         if (!dev->set_multicast_list ||
2546                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2547                                 return -EINVAL;
2548                         if (!netif_device_present(dev))
2549                                 return -ENODEV;
2550                         return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
2551                                           dev->addr_len, 1);
2552
2553                 case SIOCDELMULTI:
2554                         if (!dev->set_multicast_list ||
2555                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2556                                 return -EINVAL;
2557                         if (!netif_device_present(dev))
2558                                 return -ENODEV;
2559                         return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
2560                                              dev->addr_len, 1);
2561
2562                 case SIOCGIFINDEX:
2563                         ifr->ifr_ifindex = dev->ifindex;
2564                         return 0;
2565
2566                 case SIOCGIFTXQLEN:
2567                         ifr->ifr_qlen = dev->tx_queue_len;
2568                         return 0;
2569
2570                 case SIOCSIFTXQLEN:
2571                         if (ifr->ifr_qlen < 0)
2572                                 return -EINVAL;
2573                         dev->tx_queue_len = ifr->ifr_qlen;
2574                         return 0;
2575
2576                 case SIOCSIFNAME:
2577                         ifr->ifr_newname[IFNAMSIZ-1] = '\0';
2578                         return dev_change_name(dev, ifr->ifr_newname);
2579
2580                 /*
2581                  *      Unknown or private ioctl
2582                  */
2583
2584                 default:
2585                         if ((cmd >= SIOCDEVPRIVATE &&
2586                             cmd <= SIOCDEVPRIVATE + 15) ||
2587                             cmd == SIOCBONDENSLAVE ||
2588                             cmd == SIOCBONDRELEASE ||
2589                             cmd == SIOCBONDSETHWADDR ||
2590                             cmd == SIOCBONDSLAVEINFOQUERY ||
2591                             cmd == SIOCBONDINFOQUERY ||
2592                             cmd == SIOCBONDCHANGEACTIVE ||
2593                             cmd == SIOCGMIIPHY ||
2594                             cmd == SIOCGMIIREG ||
2595                             cmd == SIOCSMIIREG ||
2596                             cmd == SIOCBRADDIF ||
2597                             cmd == SIOCBRDELIF ||
2598                             cmd == SIOCWANDEV) {
2599                                 err = -EOPNOTSUPP;
2600                                 if (dev->do_ioctl) {
2601                                         if (netif_device_present(dev))
2602                                                 err = dev->do_ioctl(dev, ifr,
2603                                                                     cmd);
2604                                         else
2605                                                 err = -ENODEV;
2606                                 }
2607                         } else
2608                                 err = -EINVAL;
2609
2610         }
2611         return err;
2612 }
2613
2614 /*
2615  *      This function handles all "interface"-type I/O control requests. The actual
2616  *      'doing' part of this is dev_ifsioc above.
2617  */
2618
2619 /**
2620  *      dev_ioctl       -       network device ioctl
2621  *      @cmd: command to issue
2622  *      @arg: pointer to a struct ifreq in user space
2623  *
2624  *      Issue ioctl functions to devices. This is normally called by the
2625  *      user space syscall interfaces but can sometimes be useful for
2626  *      other purposes. The return value is the return from the syscall if
2627  *      positive or a negative errno code on error.
2628  */
2629
2630 int dev_ioctl(unsigned int cmd, void __user *arg)
2631 {
2632         struct ifreq ifr;
2633         int ret;
2634         char *colon;
2635
2636         /* One special case: SIOCGIFCONF takes ifconf argument
2637            and requires shared lock, because it sleeps writing
2638            to user space.
2639          */
2640
2641         if (cmd == SIOCGIFCONF) {
2642                 rtnl_lock();
2643                 ret = dev_ifconf((char __user *) arg);
2644                 rtnl_unlock();
2645                 return ret;
2646         }
2647         if (cmd == SIOCGIFNAME)
2648                 return dev_ifname((struct ifreq __user *)arg);
2649
2650         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2651                 return -EFAULT;
2652
2653         ifr.ifr_name[IFNAMSIZ-1] = 0;
2654
2655         colon = strchr(ifr.ifr_name, ':');
2656         if (colon)
2657                 *colon = 0;
2658
2659         /*
2660          *      See which interface the caller is talking about.
2661          */
2662
2663         switch (cmd) {
2664                 /*
2665                  *      These ioctl calls:
2666                  *      - can be done by all.
2667                  *      - atomic and do not require locking.
2668                  *      - return a value
2669                  */
2670                 case SIOCGIFFLAGS:
2671                 case SIOCGIFMETRIC:
2672                 case SIOCGIFMTU:
2673                 case SIOCGIFHWADDR:
2674                 case SIOCGIFSLAVE:
2675                 case SIOCGIFMAP:
2676                 case SIOCGIFINDEX:
2677                 case SIOCGIFTXQLEN:
2678                         dev_load(ifr.ifr_name);
2679                         read_lock(&dev_base_lock);
2680                         ret = dev_ifsioc(&ifr, cmd);
2681                         read_unlock(&dev_base_lock);
2682                         if (!ret) {
2683                                 if (colon)
2684                                         *colon = ':';
2685                                 if (copy_to_user(arg, &ifr,
2686                                                  sizeof(struct ifreq)))
2687                                         ret = -EFAULT;
2688                         }
2689                         return ret;
2690
2691                 case SIOCETHTOOL:
2692                         dev_load(ifr.ifr_name);
2693                         rtnl_lock();
2694                         ret = dev_ethtool(&ifr);
2695                         rtnl_unlock();
2696                         if (!ret) {
2697                                 if (colon)
2698                                         *colon = ':';
2699                                 if (copy_to_user(arg, &ifr,
2700                                                  sizeof(struct ifreq)))
2701                                         ret = -EFAULT;
2702                         }
2703                         return ret;
2704
2705                 /*
2706                  *      These ioctl calls:
2707                  *      - require superuser power.
2708                  *      - require strict serialization.
2709                  *      - return a value
2710                  */
2711                 case SIOCGMIIPHY:
2712                 case SIOCGMIIREG:
2713                 case SIOCSIFNAME:
2714                         if (!capable(CAP_NET_ADMIN))
2715                                 return -EPERM;
2716                         dev_load(ifr.ifr_name);
2717                         rtnl_lock();
2718                         ret = dev_ifsioc(&ifr, cmd);
2719                         rtnl_unlock();
2720                         if (!ret) {
2721                                 if (colon)
2722                                         *colon = ':';
2723                                 if (copy_to_user(arg, &ifr,
2724                                                  sizeof(struct ifreq)))
2725                                         ret = -EFAULT;
2726                         }
2727                         return ret;
2728
2729                 /*
2730                  *      These ioctl calls:
2731                  *      - require superuser power.
2732                  *      - require strict serialization.
2733                  *      - do not return a value
2734                  */
2735                 case SIOCSIFFLAGS:
2736                 case SIOCSIFMETRIC:
2737                 case SIOCSIFMTU:
2738                 case SIOCSIFMAP:
2739                 case SIOCSIFHWADDR:
2740                 case SIOCSIFSLAVE:
2741                 case SIOCADDMULTI:
2742                 case SIOCDELMULTI:
2743                 case SIOCSIFHWBROADCAST:
2744                 case SIOCSIFTXQLEN:
2745                 case SIOCSMIIREG:
2746                 case SIOCBONDENSLAVE:
2747                 case SIOCBONDRELEASE:
2748                 case SIOCBONDSETHWADDR:
2749                 case SIOCBONDCHANGEACTIVE:
2750                 case SIOCBRADDIF:
2751                 case SIOCBRDELIF:
2752                         if (!capable(CAP_NET_ADMIN))
2753                                 return -EPERM;
2754                         /* fall through */
2755                 case SIOCBONDSLAVEINFOQUERY:
2756                 case SIOCBONDINFOQUERY:
2757                         dev_load(ifr.ifr_name);
2758                         rtnl_lock();
2759                         ret = dev_ifsioc(&ifr, cmd);
2760                         rtnl_unlock();
2761                         return ret;
2762
2763                 case SIOCGIFMEM:
2764                         /* Get the per device memory space. We can add this but
2765                          * currently do not support it */
2766                 case SIOCSIFMEM:
2767                         /* Set the per device memory buffer space.
2768                          * Not applicable in our case */
2769                 case SIOCSIFLINK:
2770                         return -EINVAL;
2771
2772                 /*
2773                  *      Unknown or private ioctl.
2774                  */
2775                 default:
2776                         if (cmd == SIOCWANDEV ||
2777                             (cmd >= SIOCDEVPRIVATE &&
2778                              cmd <= SIOCDEVPRIVATE + 15)) {
2779                                 dev_load(ifr.ifr_name);
2780                                 rtnl_lock();
2781                                 ret = dev_ifsioc(&ifr, cmd);
2782                                 rtnl_unlock();
2783                                 if (!ret && copy_to_user(arg, &ifr,
2784                                                          sizeof(struct ifreq)))
2785                                         ret = -EFAULT;
2786                                 return ret;
2787                         }
2788 #ifdef CONFIG_WIRELESS_EXT
2789                         /* Take care of Wireless Extensions */
2790                         if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
2791                                 /* If command is `set a parameter', or
2792                                  * `get the encoding parameters', check if
2793                                  * the user has the right to do it */
2794                                 if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE
2795                                     || cmd == SIOCGIWENCODEEXT) {
2796                                         if (!capable(CAP_NET_ADMIN))
2797                                                 return -EPERM;
2798                                 }
2799                                 dev_load(ifr.ifr_name);
2800                                 rtnl_lock();
2801                                 /* Follow me in net/core/wireless.c */
2802                                 ret = wireless_process_ioctl(&ifr, cmd);
2803                                 rtnl_unlock();
2804                                 if (IW_IS_GET(cmd) &&
2805                                     copy_to_user(arg, &ifr,
2806                                                  sizeof(struct ifreq)))
2807                                         ret = -EFAULT;
2808                                 return ret;
2809                         }
2810 #endif  /* CONFIG_WIRELESS_EXT */
2811                         return -EINVAL;
2812         }
2813 }
2814
2815
2816 /**
2817  *      dev_new_index   -       allocate an ifindex
2818  *
2819  *      Returns a suitable unique value for a new device interface
2820  *      number.  The caller must hold the rtnl semaphore or the
2821  *      dev_base_lock to be sure it remains unique.
2822  */
2823 static int dev_new_index(void)
2824 {
2825         static int ifindex;
2826         for (;;) {
2827                 if (++ifindex <= 0)
2828                         ifindex = 1;
2829                 if (!__dev_get_by_index(ifindex))
2830                         return ifindex;
2831         }
2832 }
2833
2834 static int dev_boot_phase = 1;
2835
2836 /* Delayed registration/unregisteration */
2837 static DEFINE_SPINLOCK(net_todo_list_lock);
2838 static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
2839
2840 static void net_set_todo(struct net_device *dev)
2841 {
2842         spin_lock(&net_todo_list_lock);
2843         list_add_tail(&dev->todo_list, &net_todo_list);
2844         spin_unlock(&net_todo_list_lock);
2845 }
2846
2847 /**
2848  *      register_netdevice      - register a network device
2849  *      @dev: device to register
2850  *
2851  *      Take a completed network device structure and add it to the kernel
2852  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2853  *      chain. 0 is returned on success. A negative errno code is returned
2854  *      on a failure to set up the device, or if the name is a duplicate.
2855  *
2856  *      Callers must hold the rtnl semaphore. You may want
2857  *      register_netdev() instead of this.
2858  *
2859  *      BUGS:
2860  *      The locking appears insufficient to guarantee two parallel registers
2861  *      will not get the same name.
2862  */
2863
2864 int register_netdevice(struct net_device *dev)
2865 {
2866         struct hlist_head *head;
2867         struct hlist_node *p;
2868         int ret;
2869
2870         BUG_ON(dev_boot_phase);
2871         ASSERT_RTNL();
2872
2873         might_sleep();
2874
2875         /* When net_device's are persistent, this will be fatal. */
2876         BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
2877
2878         spin_lock_init(&dev->queue_lock);
2879         spin_lock_init(&dev->_xmit_lock);
2880         dev->xmit_lock_owner = -1;
2881 #ifdef CONFIG_NET_CLS_ACT
2882         spin_lock_init(&dev->ingress_lock);
2883 #endif
2884
2885         dev->iflink = -1;
2886
2887         /* Init, if this function is available */
2888         if (dev->init) {
2889                 ret = dev->init(dev);
2890                 if (ret) {
2891                         if (ret > 0)
2892                                 ret = -EIO;
2893                         goto out;
2894                 }
2895         }
2896
2897         if (!dev_valid_name(dev->name)) {
2898                 ret = -EINVAL;
2899                 goto out;
2900         }
2901
2902         dev->ifindex = dev_new_index();
2903         if (dev->iflink == -1)
2904                 dev->iflink = dev->ifindex;
2905
2906         /* Check for existence of name */
2907         head = dev_name_hash(dev->name);
2908         hlist_for_each(p, head) {
2909                 struct net_device *d
2910                         = hlist_entry(p, struct net_device, name_hlist);
2911                 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
2912                         ret = -EEXIST;
2913                         goto out;
2914                 }
2915         }
2916
2917         /* Fix illegal SG+CSUM combinations. */
2918         if ((dev->features & NETIF_F_SG) &&
2919             !(dev->features & NETIF_F_ALL_CSUM)) {
2920                 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
2921                        dev->name);
2922                 dev->features &= ~NETIF_F_SG;
2923         }
2924
2925         /* TSO requires that SG is present as well. */
2926         if ((dev->features & NETIF_F_TSO) &&
2927             !(dev->features & NETIF_F_SG)) {
2928                 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
2929                        dev->name);
2930                 dev->features &= ~NETIF_F_TSO;
2931         }
2932         if (dev->features & NETIF_F_UFO) {
2933                 if (!(dev->features & NETIF_F_HW_CSUM)) {
2934                         printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
2935                                         "NETIF_F_HW_CSUM feature.\n",
2936                                                         dev->name);
2937                         dev->features &= ~NETIF_F_UFO;
2938                 }
2939                 if (!(dev->features & NETIF_F_SG)) {
2940                         printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
2941                                         "NETIF_F_SG feature.\n",
2942                                         dev->name);
2943                         dev->features &= ~NETIF_F_UFO;
2944                 }
2945         }
2946
2947         /*
2948          *      nil rebuild_header routine,
2949          *      that should be never called and used as just bug trap.
2950          */
2951
2952         if (!dev->rebuild_header)
2953                 dev->rebuild_header = default_rebuild_header;
2954
2955         ret = netdev_register_sysfs(dev);
2956         if (ret)
2957                 goto out;
2958         dev->reg_state = NETREG_REGISTERED;
2959
2960         /*
2961          *      Default initial state at registry is that the
2962          *      device is present.
2963          */
2964
2965         set_bit(__LINK_STATE_PRESENT, &dev->state);
2966
2967         dev->next = NULL;
2968         dev_init_scheduler(dev);
2969         write_lock_bh(&dev_base_lock);
2970         *dev_tail = dev;
2971         dev_tail = &dev->next;
2972         hlist_add_head(&dev->name_hlist, head);
2973         hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
2974         dev_hold(dev);
2975         write_unlock_bh(&dev_base_lock);
2976
2977         /* Notify protocols, that a new device appeared. */
2978         raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
2979
2980         ret = 0;
2981
2982 out:
2983         return ret;
2984 }
2985
2986 /**
2987  *      register_netdev - register a network device
2988  *      @dev: device to register
2989  *
2990  *      Take a completed network device structure and add it to the kernel
2991  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2992  *      chain. 0 is returned on success. A negative errno code is returned
2993  *      on a failure to set up the device, or if the name is a duplicate.
2994  *
2995  *      This is a wrapper around register_netdev that takes the rtnl semaphore
2996  *      and expands the device name if you passed a format string to
2997  *      alloc_netdev.
2998  */
2999 int register_netdev(struct net_device *dev)
3000 {
3001         int err;
3002
3003         rtnl_lock();
3004
3005         /*
3006          * If the name is a format string the caller wants us to do a
3007          * name allocation.
3008          */
3009         if (strchr(dev->name, '%')) {
3010                 err = dev_alloc_name(dev, dev->name);
3011                 if (err < 0)
3012                         goto out;
3013         }
3014
3015         err = register_netdevice(dev);
3016 out:
3017         rtnl_unlock();
3018         return err;
3019 }
3020 EXPORT_SYMBOL(register_netdev);
3021
3022 /*
3023  * netdev_wait_allrefs - wait until all references are gone.
3024  *
3025  * This is called when unregistering network devices.
3026  *
3027  * Any protocol or device that holds a reference should register
3028  * for netdevice notification, and cleanup and put back the
3029  * reference if they receive an UNREGISTER event.
3030  * We can get stuck here if buggy protocols don't correctly
3031  * call dev_put.
3032  */
3033 static void netdev_wait_allrefs(struct net_device *dev)
3034 {
3035         unsigned long rebroadcast_time, warning_time;
3036
3037         rebroadcast_time = warning_time = jiffies;
3038         while (atomic_read(&dev->refcnt) != 0) {
3039                 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
3040                         rtnl_lock();
3041
3042                         /* Rebroadcast unregister notification */
3043                         raw_notifier_call_chain(&netdev_chain,
3044                                             NETDEV_UNREGISTER, dev);
3045
3046                         if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
3047                                      &dev->state)) {
3048                                 /* We must not have linkwatch events
3049                                  * pending on unregister. If this
3050                                  * happens, we simply run the queue
3051                                  * unscheduled, resulting in a noop
3052                                  * for this device.
3053                                  */
3054                                 linkwatch_run_queue();
3055                         }
3056
3057                         __rtnl_unlock();
3058
3059                         rebroadcast_time = jiffies;
3060                 }
3061
3062                 msleep(250);
3063
3064                 if (time_after(jiffies, warning_time + 10 * HZ)) {
3065                         printk(KERN_EMERG "unregister_netdevice: "
3066                                "waiting for %s to become free. Usage "
3067                                "count = %d\n",
3068                                dev->name, atomic_read(&dev->refcnt));
3069                         warning_time = jiffies;
3070                 }
3071         }
3072 }
3073
3074 /* The sequence is:
3075  *
3076  *      rtnl_lock();
3077  *      ...
3078  *      register_netdevice(x1);
3079  *      register_netdevice(x2);
3080  *      ...
3081  *      unregister_netdevice(y1);
3082  *      unregister_netdevice(y2);
3083  *      ...
3084  *      rtnl_unlock();
3085  *      free_netdev(y1);
3086  *      free_netdev(y2);
3087  *
3088  * We are invoked by rtnl_unlock() after it drops the semaphore.
3089  * This allows us to deal with problems:
3090  * 1) We can delete sysfs objects which invoke hotplug
3091  *    without deadlocking with linkwatch via keventd.
3092  * 2) Since we run with the RTNL semaphore not held, we can sleep
3093  *    safely in order to wait for the netdev refcnt to drop to zero.
3094  */
3095 static DEFINE_MUTEX(net_todo_run_mutex);
3096 void netdev_run_todo(void)
3097 {
3098         struct list_head list;
3099
3100         /* Need to guard against multiple cpu's getting out of order. */
3101         mutex_lock(&net_todo_run_mutex);
3102
3103         /* Not safe to do outside the semaphore.  We must not return
3104          * until all unregister events invoked by the local processor
3105          * have been completed (either by this todo run, or one on
3106          * another cpu).
3107          */
3108         if (list_empty(&net_todo_list))
3109                 goto out;
3110
3111         /* Snapshot list, allow later requests */
3112         spin_lock(&net_todo_list_lock);
3113         list_replace_init(&net_todo_list, &list);
3114         spin_unlock(&net_todo_list_lock);
3115
3116         while (!list_empty(&list)) {
3117                 struct net_device *dev
3118                         = list_entry(list.next, struct net_device, todo_list);
3119                 list_del(&dev->todo_list);
3120
3121                 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
3122                         printk(KERN_ERR "network todo '%s' but state %d\n",
3123                                dev->name, dev->reg_state);
3124                         dump_stack();
3125                         continue;
3126                 }
3127
3128                 netdev_unregister_sysfs(dev);
3129                 dev->reg_state = NETREG_UNREGISTERED;
3130
3131                 netdev_wait_allrefs(dev);
3132
3133                 /* paranoia */
3134                 BUG_ON(atomic_read(&dev->refcnt));
3135                 BUG_TRAP(!dev->ip_ptr);
3136                 BUG_TRAP(!dev->ip6_ptr);
3137                 BUG_TRAP(!dev->dn_ptr);
3138
3139                 /* It must be the very last action,
3140                  * after this 'dev' may point to freed up memory.
3141                  */
3142                 if (dev->destructor)
3143                         dev->destructor(dev);
3144         }
3145
3146 out:
3147         mutex_unlock(&net_todo_run_mutex);
3148 }
3149
3150 /**
3151  *      alloc_netdev - allocate network device
3152  *      @sizeof_priv:   size of private data to allocate space for
3153  *      @name:          device name format string
3154  *      @setup:         callback to initialize device
3155  *
3156  *      Allocates a struct net_device with private data area for driver use
3157  *      and performs basic initialization.
3158  */
3159 struct net_device *alloc_netdev(int sizeof_priv, const char *name,
3160                 void (*setup)(struct net_device *))
3161 {
3162         void *p;
3163         struct net_device *dev;
3164         int alloc_size;
3165
3166         BUG_ON(strlen(name) >= sizeof(dev->name));
3167
3168         /* ensure 32-byte alignment of both the device and private area */
3169         alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
3170         alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
3171
3172         p = kzalloc(alloc_size, GFP_KERNEL);
3173         if (!p) {
3174                 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
3175                 return NULL;
3176         }
3177
3178         dev = (struct net_device *)
3179                 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
3180         dev->padded = (char *)dev - (char *)p;
3181
3182         if (sizeof_priv)
3183                 dev->priv = netdev_priv(dev);
3184
3185         setup(dev);
3186         strcpy(dev->name, name);
3187         return dev;
3188 }
3189 EXPORT_SYMBOL(alloc_netdev);
3190
3191 /**
3192  *      free_netdev - free network device
3193  *      @dev: device
3194  *
3195  *      This function does the last stage of destroying an allocated device
3196  *      interface. The reference to the device object is released.
3197  *      If this is the last reference then it will be freed.
3198  */
3199 void free_netdev(struct net_device *dev)
3200 {
3201 #ifdef CONFIG_SYSFS
3202         /*  Compatibility with error handling in drivers */
3203         if (dev->reg_state == NETREG_UNINITIALIZED) {
3204                 kfree((char *)dev - dev->padded);
3205                 return;
3206         }
3207
3208         BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3209         dev->reg_state = NETREG_RELEASED;
3210
3211         /* will free via device release */
3212         put_device(&dev->dev);
3213 #else
3214         kfree((char *)dev - dev->padded);
3215 #endif
3216 }
3217
3218 /* Synchronize with packet receive processing. */
3219 void synchronize_net(void)
3220 {
3221         might_sleep();
3222         synchronize_rcu();
3223 }
3224
3225 /**
3226  *      unregister_netdevice - remove device from the kernel
3227  *      @dev: device
3228  *
3229  *      This function shuts down a device interface and removes it
3230  *      from the kernel tables. On success 0 is returned, on a failure
3231  *      a negative errno code is returned.
3232  *
3233  *      Callers must hold the rtnl semaphore.  You may want
3234  *      unregister_netdev() instead of this.
3235  */
3236
3237 void unregister_netdevice(struct net_device *dev)
3238 {
3239         struct net_device *d, **dp;
3240
3241         BUG_ON(dev_boot_phase);
3242         ASSERT_RTNL();
3243
3244         /* Some devices call without registering for initialization unwind. */
3245         if (dev->reg_state == NETREG_UNINITIALIZED) {
3246                 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3247                                   "was registered\n", dev->name, dev);
3248
3249                 WARN_ON(1);
3250                 return;
3251         }
3252
3253         BUG_ON(dev->reg_state != NETREG_REGISTERED);
3254
3255         /* If device is running, close it first. */
3256         if (dev->flags & IFF_UP)
3257                 dev_close(dev);
3258
3259         /* And unlink it from device chain. */
3260         for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
3261                 if (d == dev) {
3262                         write_lock_bh(&dev_base_lock);
3263                         hlist_del(&dev->name_hlist);
3264                         hlist_del(&dev->index_hlist);
3265                         if (dev_tail == &dev->next)
3266                                 dev_tail = dp;
3267                         *dp = d->next;
3268                         write_unlock_bh(&dev_base_lock);
3269                         break;
3270                 }
3271         }
3272         BUG_ON(!d);
3273
3274         dev->reg_state = NETREG_UNREGISTERING;
3275
3276         synchronize_net();
3277
3278         /* Shutdown queueing discipline. */
3279         dev_shutdown(dev);
3280
3281
3282         /* Notify protocols, that we are about to destroy
3283            this device. They should clean all the things.
3284         */
3285         raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
3286
3287         /*
3288          *      Flush the multicast chain
3289          */
3290         dev_mc_discard(dev);
3291
3292         if (dev->uninit)
3293                 dev->uninit(dev);
3294
3295         /* Notifier chain MUST detach us from master device. */
3296         BUG_TRAP(!dev->master);
3297
3298         /* Finish processing unregister after unlock */
3299         net_set_todo(dev);
3300
3301         synchronize_net();
3302
3303         dev_put(dev);
3304 }
3305
3306 /**
3307  *      unregister_netdev - remove device from the kernel
3308  *      @dev: device
3309  *
3310  *      This function shuts down a device interface and removes it
3311  *      from the kernel tables. On success 0 is returned, on a failure
3312  *      a negative errno code is returned.
3313  *
3314  *      This is just a wrapper for unregister_netdevice that takes
3315  *      the rtnl semaphore.  In general you want to use this and not
3316  *      unregister_netdevice.
3317  */
3318 void unregister_netdev(struct net_device *dev)
3319 {
3320         rtnl_lock();
3321         unregister_netdevice(dev);
3322         rtnl_unlock();
3323 }
3324
3325 EXPORT_SYMBOL(unregister_netdev);
3326
3327 static int dev_cpu_callback(struct notifier_block *nfb,
3328                             unsigned long action,
3329                             void *ocpu)
3330 {
3331         struct sk_buff **list_skb;
3332         struct net_device **list_net;
3333         struct sk_buff *skb;
3334         unsigned int cpu, oldcpu = (unsigned long)ocpu;
3335         struct softnet_data *sd, *oldsd;
3336
3337         if (action != CPU_DEAD)
3338                 return NOTIFY_OK;
3339
3340         local_irq_disable();
3341         cpu = smp_processor_id();
3342         sd = &per_cpu(softnet_data, cpu);
3343         oldsd = &per_cpu(softnet_data, oldcpu);
3344
3345         /* Find end of our completion_queue. */
3346         list_skb = &sd->completion_queue;
3347         while (*list_skb)
3348                 list_skb = &(*list_skb)->next;
3349         /* Append completion queue from offline CPU. */
3350         *list_skb = oldsd->completion_queue;
3351         oldsd->completion_queue = NULL;
3352
3353         /* Find end of our output_queue. */
3354         list_net = &sd->output_queue;
3355         while (*list_net)
3356                 list_net = &(*list_net)->next_sched;
3357         /* Append output queue from offline CPU. */
3358         *list_net = oldsd->output_queue;
3359         oldsd->output_queue = NULL;
3360
3361         raise_softirq_irqoff(NET_TX_SOFTIRQ);
3362         local_irq_enable();
3363
3364         /* Process offline CPU's input_pkt_queue */
3365         while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
3366                 netif_rx(skb);
3367
3368         return NOTIFY_OK;
3369 }
3370
3371 #ifdef CONFIG_NET_DMA
3372 /**
3373  * net_dma_rebalance -
3374  * This is called when the number of channels allocated to the net_dma_client
3375  * changes.  The net_dma_client tries to have one DMA channel per CPU.
3376  */
3377 static void net_dma_rebalance(void)
3378 {
3379         unsigned int cpu, i, n;
3380         struct dma_chan *chan;
3381
3382         if (net_dma_count == 0) {
3383                 for_each_online_cpu(cpu)
3384                         rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
3385                 return;
3386         }
3387
3388         i = 0;
3389         cpu = first_cpu(cpu_online_map);
3390
3391         rcu_read_lock();
3392         list_for_each_entry(chan, &net_dma_client->channels, client_node) {
3393                 n = ((num_online_cpus() / net_dma_count)
3394                    + (i < (num_online_cpus() % net_dma_count) ? 1 : 0));
3395
3396                 while(n) {
3397                         per_cpu(softnet_data, cpu).net_dma = chan;
3398                         cpu = next_cpu(cpu, cpu_online_map);
3399                         n--;
3400                 }
3401                 i++;
3402         }
3403         rcu_read_unlock();
3404 }
3405
3406 /**
3407  * netdev_dma_event - event callback for the net_dma_client
3408  * @client: should always be net_dma_client
3409  * @chan: DMA channel for the event
3410  * @event: event type
3411  */
3412 static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
3413         enum dma_event event)
3414 {
3415         spin_lock(&net_dma_event_lock);
3416         switch (event) {
3417         case DMA_RESOURCE_ADDED:
3418                 net_dma_count++;
3419                 net_dma_rebalance();
3420                 break;
3421         case DMA_RESOURCE_REMOVED:
3422                 net_dma_count--;
3423                 net_dma_rebalance();
3424                 break;
3425         default:
3426                 break;
3427         }
3428         spin_unlock(&net_dma_event_lock);
3429 }
3430
3431 /**
3432  * netdev_dma_regiser - register the networking subsystem as a DMA client
3433  */
3434 static int __init netdev_dma_register(void)
3435 {
3436         spin_lock_init(&net_dma_event_lock);
3437         net_dma_client = dma_async_client_register(netdev_dma_event);
3438         if (net_dma_client == NULL)
3439                 return -ENOMEM;
3440
3441         dma_async_client_chan_request(net_dma_client, num_online_cpus());
3442         return 0;
3443 }
3444
3445 #else
3446 static int __init netdev_dma_register(void) { return -ENODEV; }
3447 #endif /* CONFIG_NET_DMA */
3448
3449 /*
3450  *      Initialize the DEV module. At boot time this walks the device list and
3451  *      unhooks any devices that fail to initialise (normally hardware not
3452  *      present) and leaves us with a valid list of present and active devices.
3453  *
3454  */
3455
3456 /*
3457  *       This is called single threaded during boot, so no need
3458  *       to take the rtnl semaphore.
3459  */
3460 static int __init net_dev_init(void)
3461 {
3462         int i, rc = -ENOMEM;
3463
3464         BUG_ON(!dev_boot_phase);
3465
3466         if (dev_proc_init())
3467                 goto out;
3468
3469         if (netdev_sysfs_init())
3470                 goto out;
3471
3472         INIT_LIST_HEAD(&ptype_all);
3473         for (i = 0; i < 16; i++)
3474                 INIT_LIST_HEAD(&ptype_base[i]);
3475
3476         for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
3477                 INIT_HLIST_HEAD(&dev_name_head[i]);
3478
3479         for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
3480                 INIT_HLIST_HEAD(&dev_index_head[i]);
3481
3482         /*
3483          *      Initialise the packet receive queues.
3484          */
3485
3486         for_each_possible_cpu(i) {
3487                 struct softnet_data *queue;
3488
3489                 queue = &per_cpu(softnet_data, i);
3490                 skb_queue_head_init(&queue->input_pkt_queue);
3491                 queue->completion_queue = NULL;
3492                 INIT_LIST_HEAD(&queue->poll_list);
3493                 set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
3494                 queue->backlog_dev.weight = weight_p;
3495                 queue->backlog_dev.poll = process_backlog;
3496                 atomic_set(&queue->backlog_dev.refcnt, 1);
3497         }
3498
3499         netdev_dma_register();
3500
3501         dev_boot_phase = 0;
3502
3503         open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
3504         open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
3505
3506         hotcpu_notifier(dev_cpu_callback, 0);
3507         dst_init();
3508         dev_mcast_init();
3509         rc = 0;
3510 out:
3511         return rc;
3512 }
3513
3514 subsys_initcall(net_dev_init);
3515
3516 EXPORT_SYMBOL(__dev_get_by_index);
3517 EXPORT_SYMBOL(__dev_get_by_name);
3518 EXPORT_SYMBOL(__dev_remove_pack);
3519 EXPORT_SYMBOL(dev_valid_name);
3520 EXPORT_SYMBOL(dev_add_pack);
3521 EXPORT_SYMBOL(dev_alloc_name);
3522 EXPORT_SYMBOL(dev_close);
3523 EXPORT_SYMBOL(dev_get_by_flags);
3524 EXPORT_SYMBOL(dev_get_by_index);
3525 EXPORT_SYMBOL(dev_get_by_name);
3526 EXPORT_SYMBOL(dev_open);
3527 EXPORT_SYMBOL(dev_queue_xmit);
3528 EXPORT_SYMBOL(dev_remove_pack);
3529 EXPORT_SYMBOL(dev_set_allmulti);
3530 EXPORT_SYMBOL(dev_set_promiscuity);
3531 EXPORT_SYMBOL(dev_change_flags);
3532 EXPORT_SYMBOL(dev_set_mtu);
3533 EXPORT_SYMBOL(dev_set_mac_address);
3534 EXPORT_SYMBOL(free_netdev);
3535 EXPORT_SYMBOL(netdev_boot_setup_check);
3536 EXPORT_SYMBOL(netdev_set_master);
3537 EXPORT_SYMBOL(netdev_state_change);
3538 EXPORT_SYMBOL(netif_receive_skb);
3539 EXPORT_SYMBOL(netif_rx);
3540 EXPORT_SYMBOL(register_gifconf);
3541 EXPORT_SYMBOL(register_netdevice);
3542 EXPORT_SYMBOL(register_netdevice_notifier);
3543 EXPORT_SYMBOL(skb_checksum_help);
3544 EXPORT_SYMBOL(synchronize_net);
3545 EXPORT_SYMBOL(unregister_netdevice);
3546 EXPORT_SYMBOL(unregister_netdevice_notifier);
3547 EXPORT_SYMBOL(net_enable_timestamp);
3548 EXPORT_SYMBOL(net_disable_timestamp);
3549 EXPORT_SYMBOL(dev_get_flags);
3550
3551 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
3552 EXPORT_SYMBOL(br_handle_frame_hook);
3553 EXPORT_SYMBOL(br_fdb_get_hook);
3554 EXPORT_SYMBOL(br_fdb_put_hook);
3555 #endif
3556
3557 #ifdef CONFIG_KMOD
3558 EXPORT_SYMBOL(dev_load);
3559 #endif
3560
3561 EXPORT_PER_CPU_SYMBOL(softnet_data);