From: Yasunori Goto Date: Sun, 21 Oct 2007 23:41:36 +0000 (-0700) Subject: memory hotplug: rearrange memory hotplug notifier X-Git-Tag: v2.6.24-rc1~108 X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7b78d335ac15b10bbcb0397c635d7f0d569b0270;p=linux-2.6 memory hotplug: rearrange memory hotplug notifier Current memory notifier has some defects yet. (Fortunately, nothing uses it.) This patch is to fix and rearrange for them. - Add information of start_pfn, nr_pages, and node id if node status is changes from/to memoryless node for callback functions. Callbacks can't do anything without those information. - Add notification going-online status. It is necessary for creating per node structure before the node's pages are available. - Move GOING_OFFLINE status notification after page isolation. It is good place for return memory like cache for callback, because returned page is not used again. - Make CANCEL events for rollingback when error occurs. - Delete MEM_MAPPING_INVALID notification. It will be not used. - Fix compile error of (un)register_memory_notifier(). Signed-off-by: Yasunori Goto Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- diff --git a/drivers/base/memory.c b/drivers/base/memory.c index c41d0728ef..7868707c7e 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -137,7 +137,7 @@ static ssize_t show_mem_state(struct sys_device *dev, char *buf) return len; } -static inline int memory_notify(unsigned long val, void *v) +int memory_notify(unsigned long val, void *v) { return blocking_notifier_call_chain(&memory_chain, val, v); } @@ -183,7 +183,6 @@ memory_block_action(struct memory_block *mem, unsigned long action) break; case MEM_OFFLINE: mem->state = MEM_GOING_OFFLINE; - memory_notify(MEM_GOING_OFFLINE, NULL); start_paddr = page_to_pfn(first_page) << PAGE_SHIFT; ret = remove_memory(start_paddr, PAGES_PER_SECTION << PAGE_SHIFT); @@ -191,7 +190,6 @@ memory_block_action(struct memory_block *mem, unsigned long action) mem->state = old_state; break; } - memory_notify(MEM_MAPPING_INVALID, NULL); break; default: printk(KERN_WARNING "%s(%p, %ld) unknown action: %ld\n", @@ -199,11 +197,6 @@ memory_block_action(struct memory_block *mem, unsigned long action) WARN_ON(1); ret = -EINVAL; } - /* - * For now, only notify on successful memory operations - */ - if (!ret) - memory_notify(action, NULL); return ret; } diff --git a/include/linux/memory.h b/include/linux/memory.h index 654ef55448..ec376e482a 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -41,18 +41,15 @@ struct memory_block { #define MEM_ONLINE (1<<0) /* exposed to userspace */ #define MEM_GOING_OFFLINE (1<<1) /* exposed to userspace */ #define MEM_OFFLINE (1<<2) /* exposed to userspace */ +#define MEM_GOING_ONLINE (1<<3) +#define MEM_CANCEL_ONLINE (1<<4) +#define MEM_CANCEL_OFFLINE (1<<5) -/* - * All of these states are currently kernel-internal for notifying - * kernel components and architectures. - * - * For MEM_MAPPING_INVALID, all notifier chains with priority >0 - * are called before pfn_to_page() becomes invalid. The priority=0 - * entry is reserved for the function that actually makes - * pfn_to_page() stop working. Any notifiers that want to be called - * after that should have priority <0. - */ -#define MEM_MAPPING_INVALID (1<<3) +struct memory_notify { + unsigned long start_pfn; + unsigned long nr_pages; + int status_change_nid; +}; struct notifier_block; struct mem_section; @@ -69,12 +66,18 @@ static inline int register_memory_notifier(struct notifier_block *nb) static inline void unregister_memory_notifier(struct notifier_block *nb) { } +static inline int memory_notify(unsigned long val, void *v) +{ + return 0; +} #else +extern int register_memory_notifier(struct notifier_block *nb); +extern void unregister_memory_notifier(struct notifier_block *nb); extern int register_new_memory(struct mem_section *); extern int unregister_memory_section(struct mem_section *); extern int memory_dev_init(void); extern int remove_memory_block(unsigned long, struct mem_section *, int); - +extern int memory_notify(unsigned long val, void *v); #define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<= end_pfn); /* at least, alignment against pageblock is necessary */ @@ -480,11 +502,27 @@ int offline_pages(unsigned long start_pfn, we assume this for now. .*/ if (!test_pages_in_a_zone(start_pfn, end_pfn)) return -EINVAL; + + zone = page_zone(pfn_to_page(start_pfn)); + node = zone_to_nid(zone); + nr_pages = end_pfn - start_pfn; + /* set above range as isolated */ ret = start_isolate_page_range(start_pfn, end_pfn); if (ret) return ret; - nr_pages = end_pfn - start_pfn; + + arg.start_pfn = start_pfn; + arg.nr_pages = nr_pages; + arg.status_change_nid = -1; + if (nr_pages >= node_present_pages(node)) + arg.status_change_nid = node; + + ret = memory_notify(MEM_GOING_OFFLINE, &arg); + ret = notifier_to_errno(ret); + if (ret) + goto failed_removal; + pfn = start_pfn; expire = jiffies + timeout; drain = 0; @@ -539,20 +577,24 @@ repeat: /* reset pagetype flags */ start_isolate_page_range(start_pfn, end_pfn); /* removal success */ - zone = page_zone(pfn_to_page(start_pfn)); zone->present_pages -= offlined_pages; zone->zone_pgdat->node_present_pages -= offlined_pages; totalram_pages -= offlined_pages; num_physpages -= offlined_pages; + vm_total_pages = nr_free_pagecache_pages(); writeback_set_ratelimit(); + + memory_notify(MEM_OFFLINE, &arg); return 0; failed_removal: printk(KERN_INFO "memory offlining %lx to %lx failed\n", start_pfn, end_pfn); + memory_notify(MEM_CANCEL_OFFLINE, &arg); /* pushback to free area */ undo_isolate_page_range(start_pfn, end_pfn); + return ret; } #else