X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=kernel%2Fsysctl.c;h=b2a2d6889babc898794e7c7fb7907ef59a3b487b;hb=320787c75ccac3189a1b7aae81f0efc1055f6d3a;hp=96efbb859997e5b98be42f10c2bda73a920e7882;hpb=54f9f80d6543fb7b157d3b11e2e7911dc1379790;p=linux-2.6 diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 96efbb8599..b2a2d6889b 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include @@ -53,8 +52,11 @@ #ifdef CONFIG_X86 #include #include +#include #endif +static int deprecated_sysctl_warning(struct __sysctl_args *args); + #if defined(CONFIG_SYSCTL) /* External variables not in a header file. */ @@ -63,21 +65,37 @@ extern int print_fatal_signals; extern int sysctl_overcommit_memory; extern int sysctl_overcommit_ratio; extern int sysctl_panic_on_oom; +extern int sysctl_oom_kill_allocating_task; +extern int sysctl_oom_dump_tasks; extern int max_threads; extern int core_uses_pid; extern int suid_dumpable; extern char core_pattern[]; extern int pid_max; extern int min_free_kbytes; -extern int printk_ratelimit_jiffies; -extern int printk_ratelimit_burst; extern int pid_max_min, pid_max_max; extern int sysctl_drop_caches; extern int percpu_pagelist_fraction; extern int compat_log; extern int maps_protect; extern int sysctl_stat_interval; -extern int audit_argv_kb; +extern int latencytop_enabled; + +/* Constants used for minimum and maximum */ +#if defined(CONFIG_DETECT_SOFTLOCKUP) || defined(CONFIG_HIGHMEM) +static int one = 1; +#endif + +#ifdef CONFIG_DETECT_SOFTLOCKUP +static int sixty = 60; +#endif + +#ifdef CONFIG_MMU +static int two = 2; +#endif + +static int zero; +static int one_hundred = 100; /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ static int maxolduid = 65535; @@ -128,32 +146,37 @@ extern int max_lock_depth; #ifdef CONFIG_SYSCTL_SYSCALL static int parse_table(int __user *, int, void __user *, size_t __user *, - void __user *, size_t, ctl_table *); + void __user *, size_t, struct ctl_table *); #endif #ifdef CONFIG_PROC_SYSCTL -static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp, +static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); -static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp, +static int proc_dointvec_taint(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); #endif -static ctl_table root_table[]; -static struct ctl_table_header root_table_header = - { root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) }; - -static ctl_table kern_table[]; -static ctl_table vm_table[]; -static ctl_table fs_table[]; -static ctl_table debug_table[]; -static ctl_table dev_table[]; -extern ctl_table random_table[]; -#ifdef CONFIG_UNIX98_PTYS -extern ctl_table pty_table[]; -#endif +static struct ctl_table root_table[]; +static struct ctl_table_root sysctl_table_root; +static struct ctl_table_header root_table_header = { + .ctl_table = root_table, + .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.header_list), + .root = &sysctl_table_root, +}; +static struct ctl_table_root sysctl_table_root = { + .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list), + .header_list = LIST_HEAD_INIT(root_table_header.ctl_entry), +}; + +static struct ctl_table kern_table[]; +static struct ctl_table vm_table[]; +static struct ctl_table fs_table[]; +static struct ctl_table debug_table[]; +static struct ctl_table dev_table[]; +extern struct ctl_table random_table[]; #ifdef CONFIG_INOTIFY_USER -extern ctl_table inotify_table[]; +extern struct ctl_table inotify_table[]; #endif #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT @@ -165,7 +188,7 @@ extern int lock_stat; /* The default sysctl tables: */ -static ctl_table root_table[] = { +static struct ctl_table root_table[] = { { .ctl_name = CTL_KERN, .procname = "kernel", @@ -178,14 +201,6 @@ static ctl_table root_table[] = { .mode = 0555, .child = vm_table, }, -#ifdef CONFIG_NET - { - .ctl_name = CTL_NET, - .procname = "net", - .mode = 0555, - .child = net_table, - }, -#endif { .ctl_name = CTL_FS, .procname = "fs", @@ -212,21 +227,24 @@ static ctl_table root_table[] = { }; #ifdef CONFIG_SCHED_DEBUG -static unsigned long min_sched_granularity_ns = 100000; /* 100 usecs */ -static unsigned long max_sched_granularity_ns = 1000000000; /* 1 second */ -static unsigned long min_wakeup_granularity_ns; /* 0 usecs */ -static unsigned long max_wakeup_granularity_ns = 1000000000; /* 1 second */ +static int min_sched_granularity_ns = 100000; /* 100 usecs */ +static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */ +static int min_wakeup_granularity_ns; /* 0 usecs */ +static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */ #endif -static ctl_table kern_table[] = { +static struct ctl_table kern_table[] = { #ifdef CONFIG_SCHED_DEBUG { .ctl_name = CTL_UNNUMBERED, - .procname = "sched_nr_latency", - .data = &sysctl_sched_nr_latency, + .procname = "sched_min_granularity_ns", + .data = &sysctl_sched_min_granularity, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = &sched_nr_latency_handler, + .strategy = &sysctl_intvec, + .extra1 = &min_sched_granularity_ns, + .extra2 = &max_sched_granularity_ns, }, { .ctl_name = CTL_UNNUMBERED, @@ -234,7 +252,7 @@ static ctl_table kern_table[] = { .data = &sysctl_sched_latency, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, + .proc_handler = &sched_nr_latency_handler, .strategy = &sysctl_intvec, .extra1 = &min_sched_granularity_ns, .extra2 = &max_sched_granularity_ns, @@ -285,7 +303,31 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sched_nr_migrate", + .data = &sysctl_sched_nr_migrate, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #endif + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sched_rt_period_us", + .data = &sysctl_sched_rt_period, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sched_rt_runtime_us", + .data = &sysctl_sched_rt_runtime, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { .ctl_name = CTL_UNNUMBERED, .procname = "sched_compat_yield", @@ -330,16 +372,6 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, -#ifdef CONFIG_AUDITSYSCALL - { - .ctl_name = CTL_UNNUMBERED, - .procname = "audit_argv_kb", - .data = &audit_argv_kb, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, -#endif { .ctl_name = KERN_CORE_PATTERN, .procname = "core_pattern", @@ -351,7 +383,6 @@ static ctl_table kern_table[] = { }, #ifdef CONFIG_PROC_SYSCTL { - .ctl_name = KERN_TAINTED, .procname = "tainted", .data = &tainted, .maxlen = sizeof(int), @@ -359,14 +390,15 @@ static ctl_table kern_table[] = { .proc_handler = &proc_dointvec_taint, }, #endif +#ifdef CONFIG_LATENCYTOP { - .ctl_name = KERN_CAP_BSET, - .procname = "cap-bound", - .data = &cap_bset, - .maxlen = sizeof(kernel_cap_t), - .mode = 0600, - .proc_handler = &proc_dointvec_bset, + .procname = "latencytop", + .data = &latencytop_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, }, +#endif #ifdef CONFIG_BLK_DEV_INITRD { .ctl_name = KERN_REALROOTDEV, @@ -438,14 +470,6 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, - { - .ctl_name = KERN_PRINTK, - .procname = "printk", - .data = &console_loglevel, - .maxlen = 4*sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, #ifdef CONFIG_KMOD { .ctl_name = KERN_MODPROBE, @@ -500,7 +524,6 @@ static ctl_table kern_table[] = { #endif #ifdef CONFIG_PROC_SYSCTL { - .ctl_name = KERN_CADPID, .procname = "cad_pid", .data = NULL, .maxlen = sizeof (int), @@ -522,14 +545,6 @@ static ctl_table kern_table[] = { .mode = 0555, .child = random_table, }, -#ifdef CONFIG_UNIX98_PTYS - { - .ctl_name = KERN_PTY, - .procname = "pty", - .mode = 0555, - .child = pty_table, - }, -#endif { .ctl_name = KERN_OVERFLOWUID, .procname = "overflowuid", @@ -601,6 +616,15 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, +#if defined CONFIG_PRINTK + { + .ctl_name = KERN_PRINTK, + .procname = "printk", + .data = &console_loglevel, + .maxlen = 4*sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { .ctl_name = KERN_PRINTK_RATELIMIT, .procname = "printk_ratelimit", @@ -618,6 +642,7 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, +#endif { .ctl_name = KERN_NGROUPS_MAX, .procname = "ngroups_max", @@ -636,7 +661,6 @@ static ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, { - .ctl_name = KERN_NMI_WATCHDOG, .procname = "nmi_watchdog", .data = &nmi_watchdog_enabled, .maxlen = sizeof (int), @@ -669,6 +693,14 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "io_delay_type", + .data = &io_delay_type, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #endif #if defined(CONFIG_MMU) { @@ -692,7 +724,6 @@ static ctl_table kern_table[] = { #endif #if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86) { - .ctl_name = KERN_ACPI_VIDEO_FLAGS, .procname = "acpi_video_flags", .data = &acpi_realmode_flags, .maxlen = sizeof (unsigned long), @@ -710,6 +741,46 @@ static ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, #endif +#ifdef CONFIG_DETECT_SOFTLOCKUP + { + .ctl_name = CTL_UNNUMBERED, + .procname = "softlockup_thresh", + .data = &softlockup_thresh, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &sixty, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "hung_task_check_count", + .data = &sysctl_hung_task_check_count, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + .strategy = &sysctl_intvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "hung_task_timeout_secs", + .data = &sysctl_hung_task_timeout_secs, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + .strategy = &sysctl_intvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "hung_task_warnings", + .data = &sysctl_hung_task_warnings, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + .strategy = &sysctl_intvec, + }, +#endif #ifdef CONFIG_COMPAT { .ctl_name = KERN_COMPAT_LOG, @@ -756,14 +827,7 @@ static ctl_table kern_table[] = { { .ctl_name = 0 } }; -/* Constants for minimum and maximum testing in vm_table. - We use these as one-element integer vectors. */ -static int zero; -static int two = 2; -static int one_hundred = 100; - - -static ctl_table vm_table[] = { +static struct ctl_table vm_table[] = { { .ctl_name = VM_OVERCOMMIT_MEMORY, .procname = "overcommit_memory", @@ -780,6 +844,22 @@ static ctl_table vm_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "oom_kill_allocating_task", + .data = &sysctl_oom_kill_allocating_task, + .maxlen = sizeof(sysctl_oom_kill_allocating_task), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "oom_dump_tasks", + .data = &sysctl_oom_dump_tasks, + .maxlen = sizeof(sysctl_oom_dump_tasks), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { .ctl_name = VM_OVERCOMMIT_RATIO, .procname = "overcommit_ratio", @@ -813,13 +893,12 @@ static ctl_table vm_table[] = { .data = &vm_dirty_ratio, .maxlen = sizeof(vm_dirty_ratio), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, + .proc_handler = &dirty_ratio_handler, .strategy = &sysctl_intvec, .extra1 = &zero, .extra2 = &one_hundred, }, { - .ctl_name = VM_DIRTY_WB_CS, .procname = "dirty_writeback_centisecs", .data = &dirty_writeback_interval, .maxlen = sizeof(dirty_writeback_interval), @@ -827,7 +906,6 @@ static ctl_table vm_table[] = { .proc_handler = &dirty_writeback_centisecs_handler, }, { - .ctl_name = VM_DIRTY_EXPIRE_CS, .procname = "dirty_expire_centisecs", .data = &dirty_expire_interval, .maxlen = sizeof(dirty_expire_interval), @@ -855,7 +933,6 @@ static ctl_table vm_table[] = { }, #ifdef CONFIG_HUGETLB_PAGE { - .ctl_name = VM_HUGETLB_PAGES, .procname = "nr_hugepages", .data = &max_huge_pages, .maxlen = sizeof(unsigned long), @@ -882,11 +959,11 @@ static ctl_table vm_table[] = { }, { .ctl_name = CTL_UNNUMBERED, - .procname = "hugetlb_dynamic_pool", - .data = &hugetlb_dynamic_pool, - .maxlen = sizeof(hugetlb_dynamic_pool), + .procname = "nr_overcommit_hugepages", + .data = &sysctl_overcommit_huge_pages, + .maxlen = sizeof(sysctl_overcommit_huge_pages), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = &hugetlb_overcommit_handler, }, #endif { @@ -1057,6 +1134,19 @@ static ctl_table vm_table[] = { .extra1 = &zero, }, #endif +#ifdef CONFIG_HIGHMEM + { + .ctl_name = CTL_UNNUMBERED, + .procname = "highmem_is_dirtyable", + .data = &vm_highmem_is_dirtyable, + .maxlen = sizeof(vm_highmem_is_dirtyable), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &one, + }, +#endif /* * NOTE: do not add new entries to this table unless you have read * Documentation/sysctl/ctl_unnumbered.txt @@ -1065,12 +1155,12 @@ static ctl_table vm_table[] = { }; #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE) -static ctl_table binfmt_misc_table[] = { +static struct ctl_table binfmt_misc_table[] = { { .ctl_name = 0 } }; #endif -static ctl_table fs_table[] = { +static struct ctl_table fs_table[] = { { .ctl_name = FS_NRINODE, .procname = "inode-nr", @@ -1088,7 +1178,6 @@ static ctl_table fs_table[] = { .proc_handler = &proc_dointvec, }, { - .ctl_name = FS_NRFILE, .procname = "file-nr", .data = &files_stat, .maxlen = 3*sizeof(int), @@ -1103,6 +1192,14 @@ static ctl_table fs_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nr_open", + .data = &sysctl_nr_open, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { .ctl_name = FS_DENTRY, .procname = "dentry-state", @@ -1164,7 +1261,6 @@ static ctl_table fs_table[] = { .extra2 = &two, }, { - .ctl_name = FS_AIO_NR, .procname = "aio-nr", .data = &aio_nr, .maxlen = sizeof(aio_nr), @@ -1172,7 +1268,6 @@ static ctl_table fs_table[] = { .proc_handler = &proc_doulongvec_minmax, }, { - .ctl_name = FS_AIO_MAX_NR, .procname = "aio-max-nr", .data = &aio_max_nr, .maxlen = sizeof(aio_max_nr), @@ -1211,7 +1306,7 @@ static ctl_table fs_table[] = { { .ctl_name = 0 } }; -static ctl_table debug_table[] = { +static struct ctl_table debug_table[] = { #if defined(CONFIG_X86) || defined(CONFIG_PPC) { .ctl_name = CTL_UNNUMBERED, @@ -1225,7 +1320,7 @@ static ctl_table debug_table[] = { { .ctl_name = 0 } }; -static ctl_table dev_table[] = { +static struct ctl_table dev_table[] = { { .ctl_name = 0 } }; @@ -1279,12 +1374,27 @@ void sysctl_head_finish(struct ctl_table_header *head) spin_unlock(&sysctl_lock); } -struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev) +static struct list_head * +lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces) { + struct list_head *header_list; + header_list = &root->header_list; + if (root->lookup) + header_list = root->lookup(root, namespaces); + return header_list; +} + +struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, + struct ctl_table_header *prev) +{ + struct ctl_table_root *root; + struct list_head *header_list; struct ctl_table_header *head; struct list_head *tmp; + spin_lock(&sysctl_lock); if (prev) { + head = prev; tmp = &prev->ctl_entry; unuse_table(prev); goto next; @@ -1298,14 +1408,38 @@ struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev) spin_unlock(&sysctl_lock); return head; next: + root = head->root; tmp = tmp->next; - if (tmp == &root_table_header.ctl_entry) - break; + header_list = lookup_header_list(root, namespaces); + if (tmp != header_list) + continue; + + do { + root = list_entry(root->root_list.next, + struct ctl_table_root, root_list); + if (root == &sysctl_table_root) + goto out; + header_list = lookup_header_list(root, namespaces); + } while (list_empty(header_list)); + tmp = header_list->next; } +out: spin_unlock(&sysctl_lock); return NULL; } +struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev) +{ + return __sysctl_head_next(current->nsproxy, prev); +} + +void register_sysctl_root(struct ctl_table_root *root) +{ + spin_lock(&sysctl_lock); + list_add_tail(&root->root_list, &sysctl_table_root.root_list); + spin_unlock(&sysctl_lock); +} + #ifdef CONFIG_SYSCTL_SYSCALL int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) @@ -1341,10 +1475,15 @@ asmlinkage long sys_sysctl(struct __sysctl_args __user *args) if (copy_from_user(&tmp, args, sizeof(tmp))) return -EFAULT; + error = deprecated_sysctl_warning(&tmp); + if (error) + goto out; + lock_kernel(); error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp, tmp.newval, tmp.newlen); unlock_kernel(); +out: return error; } #endif /* CONFIG_SYSCTL_SYSCALL */ @@ -1365,7 +1504,7 @@ static int test_perm(int mode, int op) return -EACCES; } -int sysctl_perm(ctl_table *table, int op) +int sysctl_perm(struct ctl_table *table, int op) { int error; error = security_sysctl(table, op); @@ -1378,7 +1517,7 @@ int sysctl_perm(ctl_table *table, int op) static int parse_table(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen, - ctl_table *table) + struct ctl_table *table) { int n; repeat: @@ -1409,13 +1548,12 @@ repeat: } /* Perform the actual read/write of a sysctl table entry. */ -int do_sysctl_strategy (ctl_table *table, +int do_sysctl_strategy (struct ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { int op = 0, rc; - size_t len; if (oldval) op |= 004; @@ -1436,25 +1574,10 @@ int do_sysctl_strategy (ctl_table *table, /* If there is no strategy routine, or if the strategy returns * zero, proceed with automatic r/w */ if (table->data && table->maxlen) { - if (oldval && oldlenp) { - if (get_user(len, oldlenp)) - return -EFAULT; - if (len) { - if (len > table->maxlen) - len = table->maxlen; - if(copy_to_user(oldval, table->data, len)) - return -EFAULT; - if(put_user(len, oldlenp)) - return -EFAULT; - } - } - if (newval && newlen) { - len = newlen; - if (len > table->maxlen) - len = table->maxlen; - if(copy_from_user(table->data, newval, len)) - return -EFAULT; - } + rc = sysctl_data(table, name, nlen, oldval, oldlenp, + newval, newlen); + if (rc < 0) + return rc; } return 0; } @@ -1471,20 +1594,25 @@ static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table) static __init int sysctl_init(void) { + int err; sysctl_set_parent(NULL, root_table); + err = sysctl_check_table(current->nsproxy, root_table); return 0; } core_initcall(sysctl_init); /** - * register_sysctl_table - register a sysctl hierarchy + * __register_sysctl_paths - register a sysctl hierarchy + * @root: List of sysctl headers to register on + * @namespaces: Data to compute which lists of sysctl entries are visible + * @path: The path to the directory the sysctl table is in. * @table: the top-level table structure * * Register a sysctl table hierarchy. @table should be a filled in ctl_table - * array. An entry with a ctl_name of 0 terminates the table. + * array. A completely 0 filled entry terminates the table. * - * The members of the &ctl_table structure are used as follows: + * The members of the &struct ctl_table structure are used as follows: * * ctl_name - This is the numeric sysctl value used by sysctl(2). The number * must be unique within that level of sysctl @@ -1545,21 +1673,99 @@ core_initcall(sysctl_init); * This routine returns %NULL on a failure to register, and a pointer * to the table header on success. */ -struct ctl_table_header *register_sysctl_table(ctl_table * table) +struct ctl_table_header *__register_sysctl_paths( + struct ctl_table_root *root, + struct nsproxy *namespaces, + const struct ctl_path *path, struct ctl_table *table) { - struct ctl_table_header *tmp; - tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL); - if (!tmp) + struct list_head *header_list; + struct ctl_table_header *header; + struct ctl_table *new, **prevp; + unsigned int n, npath; + + /* Count the path components */ + for (npath = 0; path[npath].ctl_name || path[npath].procname; ++npath) + ; + + /* + * For each path component, allocate a 2-element ctl_table array. + * The first array element will be filled with the sysctl entry + * for this, the second will be the sentinel (ctl_name == 0). + * + * We allocate everything in one go so that we don't have to + * worry about freeing additional memory in unregister_sysctl_table. + */ + header = kzalloc(sizeof(struct ctl_table_header) + + (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL); + if (!header) return NULL; - tmp->ctl_table = table; - INIT_LIST_HEAD(&tmp->ctl_entry); - tmp->used = 0; - tmp->unregistering = NULL; - sysctl_set_parent(NULL, table); + + new = (struct ctl_table *) (header + 1); + + /* Now connect the dots */ + prevp = &header->ctl_table; + for (n = 0; n < npath; ++n, ++path) { + /* Copy the procname */ + new->procname = path->procname; + new->ctl_name = path->ctl_name; + new->mode = 0555; + + *prevp = new; + prevp = &new->child; + + new += 2; + } + *prevp = table; + header->ctl_table_arg = table; + + INIT_LIST_HEAD(&header->ctl_entry); + header->used = 0; + header->unregistering = NULL; + header->root = root; + sysctl_set_parent(NULL, header->ctl_table); + if (sysctl_check_table(namespaces, header->ctl_table)) { + kfree(header); + return NULL; + } spin_lock(&sysctl_lock); - list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry); + header_list = lookup_header_list(root, namespaces); + list_add_tail(&header->ctl_entry, header_list); spin_unlock(&sysctl_lock); - return tmp; + + return header; +} + +/** + * register_sysctl_table_path - register a sysctl table hierarchy + * @path: The path to the directory the sysctl table is in. + * @table: the top-level table structure + * + * Register a sysctl table hierarchy. @table should be a filled in ctl_table + * array. A completely 0 filled entry terminates the table. + * + * See __register_sysctl_paths for more details. + */ +struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, + struct ctl_table *table) +{ + return __register_sysctl_paths(&sysctl_table_root, current->nsproxy, + path, table); +} + +/** + * register_sysctl_table - register a sysctl table hierarchy + * @table: the top-level table structure + * + * Register a sysctl table hierarchy. @table should be a filled in ctl_table + * array. A completely 0 filled entry terminates the table. + * + * See register_sysctl_paths for more details. + */ +struct ctl_table_header *register_sysctl_table(struct ctl_table *table) +{ + static const struct ctl_path null_path[] = { {} }; + + return register_sysctl_paths(null_path, table); } /** @@ -1572,6 +1778,10 @@ struct ctl_table_header *register_sysctl_table(ctl_table * table) void unregister_sysctl_table(struct ctl_table_header * header) { might_sleep(); + + if (header == NULL) + return; + spin_lock(&sysctl_lock); start_unregistering(header); spin_unlock(&sysctl_lock); @@ -1579,7 +1789,13 @@ void unregister_sysctl_table(struct ctl_table_header * header) } #else /* !CONFIG_SYSCTL */ -struct ctl_table_header *register_sysctl_table(ctl_table * table) +struct ctl_table_header *register_sysctl_table(struct ctl_table * table) +{ + return NULL; +} + +struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, + struct ctl_table *table) { return NULL; } @@ -1672,7 +1888,7 @@ static int _proc_do_string(void* data, int maxlen, int write, * * Returns 0 on success. */ -int proc_dostring(ctl_table *table, int write, struct file *filp, +int proc_dostring(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { return _proc_do_string(table->data, table->maxlen, write, filp, @@ -1699,7 +1915,7 @@ static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp, return 0; } -static int __do_proc_dointvec(void *tbl_data, ctl_table *table, +static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos, int (*conv)(int *negp, unsigned long *lvalp, int *valp, @@ -1809,7 +2025,7 @@ static int __do_proc_dointvec(void *tbl_data, ctl_table *table, #undef TMPBUFLEN } -static int do_proc_dointvec(ctl_table *table, int write, struct file *filp, +static int do_proc_dointvec(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos, int (*conv)(int *negp, unsigned long *lvalp, int *valp, int write, void *data), @@ -1833,7 +2049,7 @@ static int do_proc_dointvec(ctl_table *table, int write, struct file *filp, * * Returns 0 on success. */ -int proc_dointvec(ctl_table *table, int write, struct file *filp, +int proc_dointvec(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, @@ -1869,28 +2085,10 @@ static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp, return 0; } -/* - * init may raise the set. - */ - -int proc_dointvec_bset(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - int op; - - if (write && !capable(CAP_SYS_MODULE)) { - return -EPERM; - } - - op = is_init(current) ? OP_SET : OP_AND; - return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, - do_proc_dointvec_bset_conv,&op); -} - /* * Taint values can only be increased */ -static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp, +static int proc_dointvec_taint(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { int op; @@ -1949,7 +2147,7 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp, * * Returns 0 on success. */ -int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp, +int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { struct do_proc_dointvec_minmax_conv_param param = { @@ -1960,7 +2158,7 @@ int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp, do_proc_dointvec_minmax_conv, ¶m); } -static int __do_proc_doulongvec_minmax(void *data, ctl_table *table, int write, +static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos, @@ -2065,7 +2263,7 @@ static int __do_proc_doulongvec_minmax(void *data, ctl_table *table, int write, #undef TMPBUFLEN } -static int do_proc_doulongvec_minmax(ctl_table *table, int write, +static int do_proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos, @@ -2093,7 +2291,7 @@ static int do_proc_doulongvec_minmax(ctl_table *table, int write, * * Returns 0 on success. */ -int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp, +int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l); @@ -2117,7 +2315,7 @@ int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp, * * Returns 0 on success. */ -int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write, +int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -2210,7 +2408,7 @@ static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp, * * Returns 0 on success. */ -int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp, +int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, @@ -2233,7 +2431,7 @@ int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp, * * Returns 0 on success. */ -int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp, +int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, @@ -2257,21 +2455,21 @@ int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp, * * Returns 0 on success. */ -int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp, +int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { return do_proc_dointvec(table, write, filp, buffer, lenp, ppos, do_proc_dointvec_ms_jiffies_conv, NULL); } -static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp, +static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { struct pid *new_pid; pid_t tmp; int r; - tmp = pid_nr(cad_pid); + tmp = pid_vnr(cad_pid); r = __do_proc_dointvec(&tmp, table, write, filp, buffer, lenp, ppos, NULL, NULL); @@ -2288,55 +2486,49 @@ static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp, #else /* CONFIG_PROC_FS */ -int proc_dostring(ctl_table *table, int write, struct file *filp, +int proc_dostring(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec(ctl_table *table, int write, struct file *filp, +int proc_dointvec(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec_bset(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return -ENOSYS; -} - -int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp, +int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp, +int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp, +int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp, +int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp, +int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write, +int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -2353,8 +2545,42 @@ int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write, * General sysctl support routines */ +/* The generic sysctl data routine (used if no strategy routine supplied) */ +int sysctl_data(struct ctl_table *table, int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen) +{ + size_t len; + + /* Get out of I don't have a variable */ + if (!table->data || !table->maxlen) + return -ENOTDIR; + + if (oldval && oldlenp) { + if (get_user(len, oldlenp)) + return -EFAULT; + if (len) { + if (len > table->maxlen) + len = table->maxlen; + if (copy_to_user(oldval, table->data, len)) + return -EFAULT; + if (put_user(len, oldlenp)) + return -EFAULT; + } + } + + if (newval && newlen) { + if (newlen > table->maxlen) + newlen = table->maxlen; + + if (copy_from_user(table->data, newval, newlen)) + return -EFAULT; + } + return 1; +} + /* The generic string strategy routine: */ -int sysctl_string(ctl_table *table, int __user *name, int nlen, +int sysctl_string(struct ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -2400,7 +2626,7 @@ int sysctl_string(ctl_table *table, int __user *name, int nlen, * are between the minimum and maximum values given in the arrays * table->extra1 and table->extra2, respectively. */ -int sysctl_intvec(ctl_table *table, int __user *name, int nlen, +int sysctl_intvec(struct ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -2436,7 +2662,7 @@ int sysctl_intvec(ctl_table *table, int __user *name, int nlen, } /* Strategy function to convert jiffies to seconds */ -int sysctl_jiffies(ctl_table *table, int __user *name, int nlen, +int sysctl_jiffies(struct ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -2470,7 +2696,7 @@ int sysctl_jiffies(ctl_table *table, int __user *name, int nlen, } /* Strategy function to convert jiffies to seconds */ -int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen, +int sysctl_ms_jiffies(struct ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -2510,59 +2736,50 @@ int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen, asmlinkage long sys_sysctl(struct __sysctl_args __user *args) { - static int msg_count; struct __sysctl_args tmp; - int name[CTL_MAXNAME]; - int i; + int error; - /* Read in the sysctl name for better debug message logging */ if (copy_from_user(&tmp, args, sizeof(tmp))) return -EFAULT; - if (tmp.nlen <= 0 || tmp.nlen >= CTL_MAXNAME) - return -ENOTDIR; - for (i = 0; i < tmp.nlen; i++) - if (get_user(name[i], tmp.name + i)) - return -EFAULT; - /* Ignore accesses to kernel.version */ - if ((tmp.nlen == 2) && (name[0] == CTL_KERN) && (name[1] == KERN_VERSION)) - goto out; + error = deprecated_sysctl_warning(&tmp); - if (msg_count < 5) { - msg_count++; - printk(KERN_INFO - "warning: process `%s' used the removed sysctl " - "system call with ", current->comm); - for (i = 0; i < tmp.nlen; i++) - printk("%d.", name[i]); - printk("\n"); - } -out: + /* If no error reading the parameters then just -ENOSYS ... */ + if (!error) + error = -ENOSYS; + + return error; +} + +int sysctl_data(struct ctl_table *table, int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen) +{ return -ENOSYS; } -int sysctl_string(ctl_table *table, int __user *name, int nlen, +int sysctl_string(struct ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { return -ENOSYS; } -int sysctl_intvec(ctl_table *table, int __user *name, int nlen, +int sysctl_intvec(struct ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { return -ENOSYS; } -int sysctl_jiffies(ctl_table *table, int __user *name, int nlen, +int sysctl_jiffies(struct ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { return -ENOSYS; } -int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen, +int sysctl_ms_jiffies(struct ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -2571,6 +2788,37 @@ int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen, #endif /* CONFIG_SYSCTL_SYSCALL */ +static int deprecated_sysctl_warning(struct __sysctl_args *args) +{ + static int msg_count; + int name[CTL_MAXNAME]; + int i; + + /* Check args->nlen. */ + if (args->nlen < 0 || args->nlen > CTL_MAXNAME) + return -ENOTDIR; + + /* Read in the sysctl name for better debug message logging */ + for (i = 0; i < args->nlen; i++) + if (get_user(name[i], args->name + i)) + return -EFAULT; + + /* Ignore accesses to kernel.version */ + if ((args->nlen == 2) && (name[0] == CTL_KERN) && (name[1] == KERN_VERSION)) + return 0; + + if (msg_count < 5) { + msg_count++; + printk(KERN_INFO + "warning: process `%s' used the deprecated sysctl " + "system call with ", current->comm); + for (i = 0; i < args->nlen; i++) + printk("%d.", name[i]); + printk("\n"); + } + return 0; +} + /* * No sense putting this after each symbol definition, twice, * exception granted :-) @@ -2584,8 +2832,10 @@ EXPORT_SYMBOL(proc_dostring); EXPORT_SYMBOL(proc_doulongvec_minmax); EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax); EXPORT_SYMBOL(register_sysctl_table); +EXPORT_SYMBOL(register_sysctl_paths); EXPORT_SYMBOL(sysctl_intvec); EXPORT_SYMBOL(sysctl_jiffies); EXPORT_SYMBOL(sysctl_ms_jiffies); EXPORT_SYMBOL(sysctl_string); +EXPORT_SYMBOL(sysctl_data); EXPORT_SYMBOL(unregister_sysctl_table);