2 * NET3 IP device support routines.
4 * Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
11 * Derived from the IP parts of dev.c 1.0.19
13 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14 * Mark Evans, <evansmp@uhura.aston.ac.uk>
17 * Alan Cox, <gw4pts@gw4pts.ampr.org>
18 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
21 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
23 * Cyrus Durgin: updated for kmod
24 * Matthias Andree: in devinet_ioctl, compare label and
25 * address (4.4BSD alias style support),
26 * fall back to comparing just the label
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 #include <linux/bitops.h>
34 #include <linux/capability.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/string.h>
40 #include <linux/socket.h>
41 #include <linux/sockios.h>
43 #include <linux/errno.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_addr.h>
46 #include <linux/if_ether.h>
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/etherdevice.h>
50 #include <linux/skbuff.h>
51 #include <linux/init.h>
52 #include <linux/notifier.h>
53 #include <linux/inetdevice.h>
54 #include <linux/igmp.h>
56 #include <linux/sysctl.h>
58 #include <linux/kmod.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
67 struct ipv4_devconf ipv4_devconf = {
69 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
70 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
71 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
72 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
76 static struct ipv4_devconf ipv4_devconf_dflt = {
78 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
79 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
80 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
81 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
82 [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
86 #define IPV4_DEVCONF_DFLT(attr) IPV4_DEVCONF(ipv4_devconf_dflt, attr)
88 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
89 [IFA_LOCAL] = { .type = NLA_U32 },
90 [IFA_ADDRESS] = { .type = NLA_U32 },
91 [IFA_BROADCAST] = { .type = NLA_U32 },
92 [IFA_ANYCAST] = { .type = NLA_U32 },
93 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
96 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
98 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
99 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
102 static void devinet_sysctl_register(struct in_device *idev);
103 static void devinet_sysctl_unregister(struct in_device *idev);
105 static inline void devinet_sysctl_register(struct in_device *idev)
108 static inline void devinet_sysctl_unregister(struct in_device *idev)
113 /* Locks all the inet devices. */
115 static struct in_ifaddr *inet_alloc_ifa(void)
117 struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
120 INIT_RCU_HEAD(&ifa->rcu_head);
126 static void inet_rcu_free_ifa(struct rcu_head *head)
128 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
130 in_dev_put(ifa->ifa_dev);
134 static inline void inet_free_ifa(struct in_ifaddr *ifa)
136 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
139 void in_dev_finish_destroy(struct in_device *idev)
141 struct net_device *dev = idev->dev;
143 BUG_TRAP(!idev->ifa_list);
144 BUG_TRAP(!idev->mc_list);
145 #ifdef NET_REFCNT_DEBUG
146 printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
147 idev, dev ? dev->name : "NIL");
151 printk("Freeing alive in_device %p\n", idev);
157 static struct in_device *inetdev_init(struct net_device *dev)
159 struct in_device *in_dev;
163 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
166 INIT_RCU_HEAD(&in_dev->rcu_head);
167 memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
168 in_dev->cnf.sysctl = NULL;
170 if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
172 /* Reference in_dev->dev */
174 /* Account for reference dev->ip_ptr (below) */
177 devinet_sysctl_register(in_dev);
178 ip_mc_init_dev(in_dev);
179 if (dev->flags & IFF_UP)
182 /* we can receive as soon as ip_ptr is set -- do this last */
183 rcu_assign_pointer(dev->ip_ptr, in_dev);
192 static void in_dev_rcu_put(struct rcu_head *head)
194 struct in_device *idev = container_of(head, struct in_device, rcu_head);
198 static void inetdev_destroy(struct in_device *in_dev)
200 struct in_ifaddr *ifa;
201 struct net_device *dev;
209 ip_mc_destroy_dev(in_dev);
211 while ((ifa = in_dev->ifa_list) != NULL) {
212 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
218 devinet_sysctl_unregister(in_dev);
219 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
222 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
225 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
228 for_primary_ifa(in_dev) {
229 if (inet_ifa_match(a, ifa)) {
230 if (!b || inet_ifa_match(b, ifa)) {
235 } endfor_ifa(in_dev);
240 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
241 int destroy, struct nlmsghdr *nlh, u32 pid)
243 struct in_ifaddr *promote = NULL;
244 struct in_ifaddr *ifa, *ifa1 = *ifap;
245 struct in_ifaddr *last_prim = in_dev->ifa_list;
246 struct in_ifaddr *prev_prom = NULL;
247 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
251 /* 1. Deleting primary ifaddr forces deletion all secondaries
252 * unless alias promotion is set
255 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
256 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
258 while ((ifa = *ifap1) != NULL) {
259 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
260 ifa1->ifa_scope <= ifa->ifa_scope)
263 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
264 ifa1->ifa_mask != ifa->ifa_mask ||
265 !inet_ifa_match(ifa1->ifa_address, ifa)) {
266 ifap1 = &ifa->ifa_next;
272 *ifap1 = ifa->ifa_next;
274 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
275 blocking_notifier_call_chain(&inetaddr_chain,
287 *ifap = ifa1->ifa_next;
289 /* 3. Announce address deletion */
291 /* Send message first, then call notifier.
292 At first sight, FIB update triggered by notifier
293 will refer to already deleted ifaddr, that could confuse
294 netlink listeners. It is not true: look, gated sees
295 that route deleted and if it still thinks that ifaddr
296 is valid, it will try to restore deleted routes... Grr.
297 So that, this order is correct.
299 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
300 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
305 prev_prom->ifa_next = promote->ifa_next;
306 promote->ifa_next = last_prim->ifa_next;
307 last_prim->ifa_next = promote;
310 promote->ifa_flags &= ~IFA_F_SECONDARY;
311 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
312 blocking_notifier_call_chain(&inetaddr_chain,
314 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
315 if (ifa1->ifa_mask != ifa->ifa_mask ||
316 !inet_ifa_match(ifa1->ifa_address, ifa))
326 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
329 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
332 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
335 struct in_device *in_dev = ifa->ifa_dev;
336 struct in_ifaddr *ifa1, **ifap, **last_primary;
340 if (!ifa->ifa_local) {
345 ifa->ifa_flags &= ~IFA_F_SECONDARY;
346 last_primary = &in_dev->ifa_list;
348 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
349 ifap = &ifa1->ifa_next) {
350 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
351 ifa->ifa_scope <= ifa1->ifa_scope)
352 last_primary = &ifa1->ifa_next;
353 if (ifa1->ifa_mask == ifa->ifa_mask &&
354 inet_ifa_match(ifa1->ifa_address, ifa)) {
355 if (ifa1->ifa_local == ifa->ifa_local) {
359 if (ifa1->ifa_scope != ifa->ifa_scope) {
363 ifa->ifa_flags |= IFA_F_SECONDARY;
367 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
368 net_srandom(ifa->ifa_local);
372 ifa->ifa_next = *ifap;
375 /* Send message first, then call notifier.
376 Notifier will trigger FIB update, so that
377 listeners of netlink will know about new ifaddr */
378 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
379 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
384 static int inet_insert_ifa(struct in_ifaddr *ifa)
386 return __inet_insert_ifa(ifa, NULL, 0);
389 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
391 struct in_device *in_dev = __in_dev_get_rtnl(dev);
399 ipv4_devconf_setall(in_dev);
400 if (ifa->ifa_dev != in_dev) {
401 BUG_TRAP(!ifa->ifa_dev);
403 ifa->ifa_dev = in_dev;
405 if (LOOPBACK(ifa->ifa_local))
406 ifa->ifa_scope = RT_SCOPE_HOST;
407 return inet_insert_ifa(ifa);
410 struct in_device *inetdev_by_index(int ifindex)
412 struct net_device *dev;
413 struct in_device *in_dev = NULL;
414 read_lock(&dev_base_lock);
415 dev = __dev_get_by_index(&init_net, ifindex);
417 in_dev = in_dev_get(dev);
418 read_unlock(&dev_base_lock);
422 /* Called only from RTNL semaphored context. No locks. */
424 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
429 for_primary_ifa(in_dev) {
430 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
432 } endfor_ifa(in_dev);
436 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
438 struct net *net = skb->sk->sk_net;
439 struct nlattr *tb[IFA_MAX+1];
440 struct in_device *in_dev;
441 struct ifaddrmsg *ifm;
442 struct in_ifaddr *ifa, **ifap;
447 if (net != &init_net)
450 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
454 ifm = nlmsg_data(nlh);
455 in_dev = inetdev_by_index(ifm->ifa_index);
456 if (in_dev == NULL) {
461 __in_dev_put(in_dev);
463 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
464 ifap = &ifa->ifa_next) {
466 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
469 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
472 if (tb[IFA_ADDRESS] &&
473 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
474 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
477 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
481 err = -EADDRNOTAVAIL;
486 static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
488 struct nlattr *tb[IFA_MAX+1];
489 struct in_ifaddr *ifa;
490 struct ifaddrmsg *ifm;
491 struct net_device *dev;
492 struct in_device *in_dev;
495 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
499 ifm = nlmsg_data(nlh);
500 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) {
505 dev = __dev_get_by_index(&init_net, ifm->ifa_index);
511 in_dev = __in_dev_get_rtnl(dev);
512 if (in_dev == NULL) {
517 ifa = inet_alloc_ifa();
520 * A potential indev allocation can be left alive, it stays
521 * assigned to its device and is destroy with it.
527 ipv4_devconf_setall(in_dev);
530 if (tb[IFA_ADDRESS] == NULL)
531 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
533 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
534 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
535 ifa->ifa_flags = ifm->ifa_flags;
536 ifa->ifa_scope = ifm->ifa_scope;
537 ifa->ifa_dev = in_dev;
539 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
540 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
542 if (tb[IFA_BROADCAST])
543 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
546 ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
549 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
551 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
559 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
561 struct net *net = skb->sk->sk_net;
562 struct in_ifaddr *ifa;
566 if (net != &init_net)
569 ifa = rtm_to_ifaddr(nlh);
573 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
577 * Determine a default network mask, based on the IP address.
580 static __inline__ int inet_abc_len(__be32 addr)
582 int rc = -1; /* Something else, probably a multicast. */
587 __u32 haddr = ntohl(addr);
589 if (IN_CLASSA(haddr))
591 else if (IN_CLASSB(haddr))
593 else if (IN_CLASSC(haddr))
601 int devinet_ioctl(unsigned int cmd, void __user *arg)
604 struct sockaddr_in sin_orig;
605 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
606 struct in_device *in_dev;
607 struct in_ifaddr **ifap = NULL;
608 struct in_ifaddr *ifa = NULL;
609 struct net_device *dev;
612 int tryaddrmatch = 0;
615 * Fetch the caller's info block into kernel space
618 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
620 ifr.ifr_name[IFNAMSIZ - 1] = 0;
622 /* save original address for comparison */
623 memcpy(&sin_orig, sin, sizeof(*sin));
625 colon = strchr(ifr.ifr_name, ':');
630 dev_load(&init_net, ifr.ifr_name);
634 case SIOCGIFADDR: /* Get interface address */
635 case SIOCGIFBRDADDR: /* Get the broadcast address */
636 case SIOCGIFDSTADDR: /* Get the destination address */
637 case SIOCGIFNETMASK: /* Get the netmask for the interface */
638 /* Note that these ioctls will not sleep,
639 so that we do not impose a lock.
640 One day we will be forced to put shlock here (I mean SMP)
642 tryaddrmatch = (sin_orig.sin_family == AF_INET);
643 memset(sin, 0, sizeof(*sin));
644 sin->sin_family = AF_INET;
649 if (!capable(CAP_NET_ADMIN))
652 case SIOCSIFADDR: /* Set interface address (and family) */
653 case SIOCSIFBRDADDR: /* Set the broadcast address */
654 case SIOCSIFDSTADDR: /* Set the destination address */
655 case SIOCSIFNETMASK: /* Set the netmask for the interface */
657 if (!capable(CAP_NET_ADMIN))
660 if (sin->sin_family != AF_INET)
671 if ((dev = __dev_get_by_name(&init_net, ifr.ifr_name)) == NULL)
677 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
679 /* Matthias Andree */
680 /* compare label and address (4.4BSD style) */
681 /* note: we only do this for a limited set of ioctls
682 and only if the original address family was AF_INET.
683 This is checked above. */
684 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
685 ifap = &ifa->ifa_next) {
686 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
687 sin_orig.sin_addr.s_addr ==
693 /* we didn't get a match, maybe the application is
694 4.3BSD-style and passed in junk so we fall back to
695 comparing just the label */
697 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
698 ifap = &ifa->ifa_next)
699 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
704 ret = -EADDRNOTAVAIL;
705 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
709 case SIOCGIFADDR: /* Get interface address */
710 sin->sin_addr.s_addr = ifa->ifa_local;
713 case SIOCGIFBRDADDR: /* Get the broadcast address */
714 sin->sin_addr.s_addr = ifa->ifa_broadcast;
717 case SIOCGIFDSTADDR: /* Get the destination address */
718 sin->sin_addr.s_addr = ifa->ifa_address;
721 case SIOCGIFNETMASK: /* Get the netmask for the interface */
722 sin->sin_addr.s_addr = ifa->ifa_mask;
727 ret = -EADDRNOTAVAIL;
731 if (!(ifr.ifr_flags & IFF_UP))
732 inet_del_ifa(in_dev, ifap, 1);
735 ret = dev_change_flags(dev, ifr.ifr_flags);
738 case SIOCSIFADDR: /* Set interface address (and family) */
740 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
745 if ((ifa = inet_alloc_ifa()) == NULL)
748 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
750 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
753 if (ifa->ifa_local == sin->sin_addr.s_addr)
755 inet_del_ifa(in_dev, ifap, 0);
756 ifa->ifa_broadcast = 0;
757 ifa->ifa_anycast = 0;
760 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
762 if (!(dev->flags & IFF_POINTOPOINT)) {
763 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
764 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
765 if ((dev->flags & IFF_BROADCAST) &&
766 ifa->ifa_prefixlen < 31)
767 ifa->ifa_broadcast = ifa->ifa_address |
770 ifa->ifa_prefixlen = 32;
771 ifa->ifa_mask = inet_make_mask(32);
773 ret = inet_set_ifa(dev, ifa);
776 case SIOCSIFBRDADDR: /* Set the broadcast address */
778 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
779 inet_del_ifa(in_dev, ifap, 0);
780 ifa->ifa_broadcast = sin->sin_addr.s_addr;
781 inet_insert_ifa(ifa);
785 case SIOCSIFDSTADDR: /* Set the destination address */
787 if (ifa->ifa_address == sin->sin_addr.s_addr)
790 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
793 inet_del_ifa(in_dev, ifap, 0);
794 ifa->ifa_address = sin->sin_addr.s_addr;
795 inet_insert_ifa(ifa);
798 case SIOCSIFNETMASK: /* Set the netmask for the interface */
801 * The mask we set must be legal.
804 if (bad_mask(sin->sin_addr.s_addr, 0))
807 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
808 __be32 old_mask = ifa->ifa_mask;
809 inet_del_ifa(in_dev, ifap, 0);
810 ifa->ifa_mask = sin->sin_addr.s_addr;
811 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
813 /* See if current broadcast address matches
814 * with current netmask, then recalculate
815 * the broadcast address. Otherwise it's a
816 * funny address, so don't touch it since
817 * the user seems to know what (s)he's doing...
819 if ((dev->flags & IFF_BROADCAST) &&
820 (ifa->ifa_prefixlen < 31) &&
821 (ifa->ifa_broadcast ==
822 (ifa->ifa_local|~old_mask))) {
823 ifa->ifa_broadcast = (ifa->ifa_local |
824 ~sin->sin_addr.s_addr);
826 inet_insert_ifa(ifa);
836 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
840 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
842 struct in_device *in_dev = __in_dev_get_rtnl(dev);
843 struct in_ifaddr *ifa;
847 if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
850 for (; ifa; ifa = ifa->ifa_next) {
855 if (len < (int) sizeof(ifr))
857 memset(&ifr, 0, sizeof(struct ifreq));
859 strcpy(ifr.ifr_name, ifa->ifa_label);
861 strcpy(ifr.ifr_name, dev->name);
863 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
864 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
867 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
871 buf += sizeof(struct ifreq);
872 len -= sizeof(struct ifreq);
873 done += sizeof(struct ifreq);
879 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
882 struct in_device *in_dev;
885 in_dev = __in_dev_get_rcu(dev);
889 for_primary_ifa(in_dev) {
890 if (ifa->ifa_scope > scope)
892 if (!dst || inet_ifa_match(dst, ifa)) {
893 addr = ifa->ifa_local;
897 addr = ifa->ifa_local;
898 } endfor_ifa(in_dev);
905 /* Not loopback addresses on loopback should be preferred
906 in this case. It is importnat that lo is the first interface
909 read_lock(&dev_base_lock);
911 for_each_netdev(&init_net, dev) {
912 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
915 for_primary_ifa(in_dev) {
916 if (ifa->ifa_scope != RT_SCOPE_LINK &&
917 ifa->ifa_scope <= scope) {
918 addr = ifa->ifa_local;
919 goto out_unlock_both;
921 } endfor_ifa(in_dev);
924 read_unlock(&dev_base_lock);
930 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
931 __be32 local, int scope)
938 (local == ifa->ifa_local || !local) &&
939 ifa->ifa_scope <= scope) {
940 addr = ifa->ifa_local;
945 same = (!local || inet_ifa_match(local, ifa)) &&
946 (!dst || inet_ifa_match(dst, ifa));
950 /* Is the selected addr into dst subnet? */
951 if (inet_ifa_match(addr, ifa))
953 /* No, then can we use new local src? */
954 if (ifa->ifa_scope <= scope) {
955 addr = ifa->ifa_local;
958 /* search for large dst subnet for addr */
962 } endfor_ifa(in_dev);
964 return same? addr : 0;
968 * Confirm that local IP address exists using wildcards:
969 * - dev: only on this interface, 0=any interface
970 * - dst: only in the same subnet as dst, 0=any dst
971 * - local: address, 0=autoselect the local address
972 * - scope: maximum allowed scope value for the local address
974 __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope)
977 struct in_device *in_dev;
981 if ((in_dev = __in_dev_get_rcu(dev)))
982 addr = confirm_addr_indev(in_dev, dst, local, scope);
988 read_lock(&dev_base_lock);
990 for_each_netdev(&init_net, dev) {
991 if ((in_dev = __in_dev_get_rcu(dev))) {
992 addr = confirm_addr_indev(in_dev, dst, local, scope);
998 read_unlock(&dev_base_lock);
1007 int register_inetaddr_notifier(struct notifier_block *nb)
1009 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1012 int unregister_inetaddr_notifier(struct notifier_block *nb)
1014 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1017 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1018 * alias numbering and to create unique labels if possible.
1020 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1022 struct in_ifaddr *ifa;
1025 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1026 char old[IFNAMSIZ], *dot;
1028 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1029 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1032 dot = strchr(old, ':');
1034 sprintf(old, ":%d", named);
1037 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1038 strcat(ifa->ifa_label, dot);
1040 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1045 /* Called only under RTNL semaphore */
1047 static int inetdev_event(struct notifier_block *this, unsigned long event,
1050 struct net_device *dev = ptr;
1051 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1053 if (dev->nd_net != &init_net)
1059 if (event == NETDEV_REGISTER) {
1060 in_dev = inetdev_init(dev);
1062 return notifier_from_errno(-ENOMEM);
1063 if (dev->flags & IFF_LOOPBACK) {
1064 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1065 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1072 case NETDEV_REGISTER:
1073 printk(KERN_DEBUG "inetdev_event: bug\n");
1079 if (dev->flags & IFF_LOOPBACK) {
1080 struct in_ifaddr *ifa;
1081 if ((ifa = inet_alloc_ifa()) != NULL) {
1083 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1084 ifa->ifa_prefixlen = 8;
1085 ifa->ifa_mask = inet_make_mask(8);
1086 in_dev_hold(in_dev);
1087 ifa->ifa_dev = in_dev;
1088 ifa->ifa_scope = RT_SCOPE_HOST;
1089 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1090 inet_insert_ifa(ifa);
1098 case NETDEV_CHANGEMTU:
1101 /* MTU falled under 68, disable IP */
1102 case NETDEV_UNREGISTER:
1103 inetdev_destroy(in_dev);
1105 case NETDEV_CHANGENAME:
1106 /* Do not notify about label change, this event is
1107 * not interesting to applications using netlink.
1109 inetdev_changename(dev, in_dev);
1111 devinet_sysctl_unregister(in_dev);
1112 devinet_sysctl_register(in_dev);
1119 static struct notifier_block ip_netdev_notifier = {
1120 .notifier_call =inetdev_event,
1123 static inline size_t inet_nlmsg_size(void)
1125 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1126 + nla_total_size(4) /* IFA_ADDRESS */
1127 + nla_total_size(4) /* IFA_LOCAL */
1128 + nla_total_size(4) /* IFA_BROADCAST */
1129 + nla_total_size(4) /* IFA_ANYCAST */
1130 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1133 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1134 u32 pid, u32 seq, int event, unsigned int flags)
1136 struct ifaddrmsg *ifm;
1137 struct nlmsghdr *nlh;
1139 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1143 ifm = nlmsg_data(nlh);
1144 ifm->ifa_family = AF_INET;
1145 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1146 ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1147 ifm->ifa_scope = ifa->ifa_scope;
1148 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1150 if (ifa->ifa_address)
1151 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1154 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1156 if (ifa->ifa_broadcast)
1157 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1159 if (ifa->ifa_anycast)
1160 NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1162 if (ifa->ifa_label[0])
1163 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1165 return nlmsg_end(skb, nlh);
1168 nlmsg_cancel(skb, nlh);
1172 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1174 struct net *net = skb->sk->sk_net;
1176 struct net_device *dev;
1177 struct in_device *in_dev;
1178 struct in_ifaddr *ifa;
1179 int s_ip_idx, s_idx = cb->args[0];
1181 if (net != &init_net)
1184 s_ip_idx = ip_idx = cb->args[1];
1186 for_each_netdev(&init_net, dev) {
1191 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1194 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1195 ifa = ifa->ifa_next, ip_idx++) {
1196 if (ip_idx < s_ip_idx)
1198 if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1200 RTM_NEWADDR, NLM_F_MULTI) <= 0)
1209 cb->args[1] = ip_idx;
1214 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1217 struct sk_buff *skb;
1218 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1221 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1225 err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1227 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1228 WARN_ON(err == -EMSGSIZE);
1232 err = rtnl_notify(skb, &init_net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1235 rtnl_set_sk_err(&init_net, RTNLGRP_IPV4_IFADDR, err);
1238 #ifdef CONFIG_SYSCTL
1240 static void devinet_copy_dflt_conf(struct net *net, int i)
1242 struct net_device *dev;
1244 read_lock(&dev_base_lock);
1245 for_each_netdev(net, dev) {
1246 struct in_device *in_dev;
1248 in_dev = __in_dev_get_rcu(dev);
1249 if (in_dev && !test_bit(i, in_dev->cnf.state))
1250 in_dev->cnf.data[i] = ipv4_devconf_dflt.data[i];
1253 read_unlock(&dev_base_lock);
1256 static void inet_forward_change(struct net *net)
1258 struct net_device *dev;
1259 int on = IPV4_DEVCONF_ALL(FORWARDING);
1261 IPV4_DEVCONF_ALL(ACCEPT_REDIRECTS) = !on;
1262 IPV4_DEVCONF_DFLT(FORWARDING) = on;
1264 read_lock(&dev_base_lock);
1265 for_each_netdev(net, dev) {
1266 struct in_device *in_dev;
1268 in_dev = __in_dev_get_rcu(dev);
1270 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1273 read_unlock(&dev_base_lock);
1278 static int devinet_conf_proc(ctl_table *ctl, int write,
1279 struct file* filp, void __user *buffer,
1280 size_t *lenp, loff_t *ppos)
1282 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1285 struct ipv4_devconf *cnf = ctl->extra1;
1286 struct net *net = ctl->extra2;
1287 int i = (int *)ctl->data - cnf->data;
1289 set_bit(i, cnf->state);
1291 if (cnf == &ipv4_devconf_dflt)
1292 devinet_copy_dflt_conf(net, i);
1298 static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1299 void __user *oldval, size_t __user *oldlenp,
1300 void __user *newval, size_t newlen)
1302 struct ipv4_devconf *cnf;
1304 int *valp = table->data;
1308 if (!newval || !newlen)
1311 if (newlen != sizeof(int))
1314 if (get_user(new, (int __user *)newval))
1320 if (oldval && oldlenp) {
1323 if (get_user(len, oldlenp))
1327 if (len > table->maxlen)
1328 len = table->maxlen;
1329 if (copy_to_user(oldval, valp, len))
1331 if (put_user(len, oldlenp))
1338 cnf = table->extra1;
1339 net = table->extra2;
1340 i = (int *)table->data - cnf->data;
1342 set_bit(i, cnf->state);
1344 if (cnf == &ipv4_devconf_dflt)
1345 devinet_copy_dflt_conf(net, i);
1350 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1351 struct file* filp, void __user *buffer,
1352 size_t *lenp, loff_t *ppos)
1354 int *valp = ctl->data;
1356 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1358 if (write && *valp != val) {
1359 struct net *net = ctl->extra2;
1361 if (valp == &IPV4_DEVCONF_ALL(FORWARDING))
1362 inet_forward_change(net);
1363 else if (valp != &IPV4_DEVCONF_DFLT(FORWARDING))
1370 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1371 struct file* filp, void __user *buffer,
1372 size_t *lenp, loff_t *ppos)
1374 int *valp = ctl->data;
1376 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1378 if (write && *valp != val)
1384 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1385 void __user *oldval, size_t __user *oldlenp,
1386 void __user *newval, size_t newlen)
1388 int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
1398 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1400 .ctl_name = NET_IPV4_CONF_ ## attr, \
1402 .data = ipv4_devconf.data + \
1403 NET_IPV4_CONF_ ## attr - 1, \
1404 .maxlen = sizeof(int), \
1406 .proc_handler = proc, \
1407 .strategy = sysctl, \
1408 .extra1 = &ipv4_devconf, \
1411 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1412 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1413 devinet_conf_sysctl)
1415 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1416 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1417 devinet_conf_sysctl)
1419 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1420 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1422 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1423 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1424 ipv4_doint_and_flush_strategy)
1426 static struct devinet_sysctl_table {
1427 struct ctl_table_header *sysctl_header;
1428 struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1430 } devinet_sysctl = {
1432 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1433 devinet_sysctl_forward,
1434 devinet_conf_sysctl),
1435 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1437 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1438 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1439 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1440 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1441 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1442 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1443 "accept_source_route"),
1444 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1445 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1446 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1447 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1448 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1449 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1450 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1451 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1452 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1454 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1455 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1456 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1457 "force_igmp_version"),
1458 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1459 "promote_secondaries"),
1463 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1464 int ctl_name, struct ipv4_devconf *p)
1467 struct devinet_sysctl_table *t;
1469 #define DEVINET_CTL_PATH_DEV 3
1471 struct ctl_path devinet_ctl_path[] = {
1472 { .procname = "net", .ctl_name = CTL_NET, },
1473 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1474 { .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1475 { /* to be set */ },
1479 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1483 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1484 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1485 t->devinet_vars[i].extra1 = p;
1486 t->devinet_vars[i].extra2 = net;
1490 * Make a copy of dev_name, because '.procname' is regarded as const
1491 * by sysctl and we wouldn't want anyone to change it under our feet
1492 * (see SIOCSIFNAME).
1494 t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1498 devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1499 devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1501 t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1503 if (!t->sysctl_header)
1517 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1519 struct devinet_sysctl_table *t = cnf->sysctl;
1525 unregister_sysctl_table(t->sysctl_header);
1530 static void devinet_sysctl_register(struct in_device *idev)
1532 neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1533 NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1534 __devinet_sysctl_register(idev->dev->nd_net, idev->dev->name,
1535 idev->dev->ifindex, &idev->cnf);
1538 static void devinet_sysctl_unregister(struct in_device *idev)
1540 __devinet_sysctl_unregister(&idev->cnf);
1541 neigh_sysctl_unregister(idev->arp_parms);
1545 static struct ctl_table ctl_forward_entry[] = {
1547 .ctl_name = NET_IPV4_FORWARD,
1548 .procname = "ip_forward",
1549 .data = &ipv4_devconf.data[
1550 NET_IPV4_CONF_FORWARDING - 1],
1551 .maxlen = sizeof(int),
1553 .proc_handler = devinet_sysctl_forward,
1554 .strategy = devinet_conf_sysctl,
1555 .extra1 = &ipv4_devconf,
1556 .extra2 = &init_net,
1561 static __net_initdata struct ctl_path net_ipv4_path[] = {
1562 { .procname = "net", .ctl_name = CTL_NET, },
1563 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1567 static __net_init int devinet_init_net(struct net *net)
1570 struct ctl_table *tbl;
1571 struct ipv4_devconf *all, *dflt;
1572 struct ctl_table_header *forw_hdr;
1575 all = &ipv4_devconf;
1576 dflt = &ipv4_devconf_dflt;
1577 tbl = ctl_forward_entry;
1579 if (net != &init_net) {
1580 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1584 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1586 goto err_alloc_dflt;
1588 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1592 tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1593 tbl[0].extra1 = all;
1594 tbl[0].extra2 = net;
1597 #ifdef CONFIG_SYSCTL
1598 err = __devinet_sysctl_register(net, "all",
1599 NET_PROTO_CONF_ALL, all);
1603 err = __devinet_sysctl_register(net, "default",
1604 NET_PROTO_CONF_DEFAULT, dflt);
1609 forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1610 if (forw_hdr == NULL)
1614 net->ipv4.forw_hdr = forw_hdr;
1615 net->ipv4.devconf_all = all;
1616 net->ipv4.devconf_dflt = dflt;
1619 #ifdef CONFIG_SYSCTL
1621 __devinet_sysctl_unregister(dflt);
1623 __devinet_sysctl_unregister(all);
1625 if (tbl != ctl_forward_entry)
1629 if (dflt != &ipv4_devconf_dflt)
1632 if (all != &ipv4_devconf)
1638 static __net_exit void devinet_exit_net(struct net *net)
1640 struct ctl_table *tbl;
1642 tbl = net->ipv4.forw_hdr->ctl_table_arg;
1643 #ifdef CONFIG_SYSCTL
1644 unregister_net_sysctl_table(net->ipv4.forw_hdr);
1645 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1646 __devinet_sysctl_unregister(net->ipv4.devconf_all);
1649 kfree(net->ipv4.devconf_dflt);
1650 kfree(net->ipv4.devconf_all);
1653 static __net_initdata struct pernet_operations devinet_ops = {
1654 .init = devinet_init_net,
1655 .exit = devinet_exit_net,
1658 void __init devinet_init(void)
1660 register_pernet_subsys(&devinet_ops);
1662 register_gifconf(PF_INET, inet_gifconf);
1663 register_netdevice_notifier(&ip_netdev_notifier);
1665 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1666 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1667 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1670 EXPORT_SYMBOL(in_dev_finish_destroy);
1671 EXPORT_SYMBOL(inet_select_addr);
1672 EXPORT_SYMBOL(inetdev_by_index);
1673 EXPORT_SYMBOL(register_inetaddr_notifier);
1674 EXPORT_SYMBOL(unregister_inetaddr_notifier);