of kilobytes free. The VM uses this number to compute a pages_min
value for each lowmem zone in the system. Each lowmem zone gets
a number of reserved free pages based proportionally on its size.
+
+==============================================================
+
+percpu_pagelist_fraction
+
+This is the fraction of pages at most (high mark pcp->high) in each zone that
+are allocated for each per cpu page list. The min value for this is 8. It
+means that we don't allow more than 1/8th of pages in each zone to be
+allocated in any single per_cpu_pagelist. This entry only changes the value
+of hot per cpu pagelists. User can specify a number like 100 to allocate
+1/100th of each zone to each per cpu page list.
+
+The batch value of each per cpu pagelist is also updated as a result. It is
+set to pcp->high/4. The upper limit of batch is (PAGE_SHIFT * 8)
+
+The initial value is zero. Kernel does not use this value at boot time to set
+the high water marks for each per cpu page list.
extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1];
int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *,
void __user *, size_t *, loff_t *);
+int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *,
+ void __user *, size_t *, loff_t *);
#include <linux/topology.h>
/* Returns the number of the current Node. */
VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */
VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */
VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */
+ VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */
};
extern int printk_ratelimit_burst;
extern int pid_max_min, pid_max_max;
extern int sysctl_drop_caches;
+extern int percpu_pagelist_fraction;
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
int unknown_nmi_panic;
/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
static int maxolduid = 65535;
static int minolduid;
+static int min_percpu_pagelist_fract = 8;
static int ngroups_max = NGROUPS_MAX;
.strategy = &sysctl_intvec,
.extra1 = &zero,
},
+ {
+ .ctl_name = VM_PERCPU_PAGELIST_FRACTION,
+ .procname = "percpu_pagelist_fraction",
+ .data = &percpu_pagelist_fraction,
+ .maxlen = sizeof(percpu_pagelist_fraction),
+ .mode = 0644,
+ .proc_handler = &percpu_pagelist_fraction_sysctl_handler,
+ .strategy = &sysctl_intvec,
+ .extra1 = &min_percpu_pagelist_fract,
+ },
#ifdef CONFIG_MMU
{
.ctl_name = VM_MAX_MAP_COUNT,
unsigned long totalram_pages __read_mostly;
unsigned long totalhigh_pages __read_mostly;
long nr_swap_pages;
+int percpu_pagelist_fraction;
static void fastcall free_hot_cold_page(struct page *page, int cold);
INIT_LIST_HEAD(&pcp->list);
}
+/*
+ * setup_pagelist_highmark() sets the high water mark for hot per_cpu_pagelist
+ * to the value high for the pageset p.
+ */
+
+static void setup_pagelist_highmark(struct per_cpu_pageset *p,
+ unsigned long high)
+{
+ struct per_cpu_pages *pcp;
+
+ pcp = &p->pcp[0]; /* hot list */
+ pcp->high = high;
+ pcp->batch = max(1UL, high/4);
+ if ((high/4) > (PAGE_SHIFT * 8))
+ pcp->batch = PAGE_SHIFT * 8;
+}
+
+
#ifdef CONFIG_NUMA
/*
* Boot pageset table. One per cpu which is going to be used for all
goto bad;
setup_pageset(zone->pageset[cpu], zone_batchsize(zone));
+
+ if (percpu_pagelist_fraction)
+ setup_pagelist_highmark(zone_pcp(zone, cpu),
+ (zone->present_pages / percpu_pagelist_fraction));
}
return 0;
return 0;
}
+/*
+ * percpu_pagelist_fraction - changes the pcp->high for each zone on each
+ * cpu. It is the fraction of total pages in each zone that a hot per cpu pagelist
+ * can have before it gets flushed back to buddy allocator.
+ */
+
+int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
+ struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+{
+ struct zone *zone;
+ unsigned int cpu;
+ int ret;
+
+ ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+ if (!write || (ret == -EINVAL))
+ return ret;
+ for_each_zone(zone) {
+ for_each_online_cpu(cpu) {
+ unsigned long high;
+ high = zone->present_pages / percpu_pagelist_fraction;
+ setup_pagelist_highmark(zone_pcp(zone, cpu), high);
+ }
+ }
+ return 0;
+}
+
__initdata int hashdist = HASHDIST_DEFAULT;
#ifdef CONFIG_NUMA