]> err.no Git - linux-2.6/blobdiff - arch/x86/mm/discontig_32.c
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/shaggy...
[linux-2.6] / arch / x86 / mm / discontig_32.c
index accc7c6c57fc1dace92139a5981460f7a213822a..5dfef9fa061a653669c79bdd69382169dc68561c 100644 (file)
@@ -76,13 +76,13 @@ void memory_present(int nid, unsigned long start, unsigned long end)
 {
        unsigned long pfn;
 
-       printk(KERN_INFO "Node: %d, start_pfn: %ld, end_pfn: %ld\n",
+       printk(KERN_INFO "Node: %d, start_pfn: %lx, end_pfn: %lx\n",
                        nid, start, end);
        printk(KERN_DEBUG "  Setting physnode_map array to node %d for pfns:\n", nid);
        printk(KERN_DEBUG "  ");
        for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) {
                physnode_map[pfn / PAGES_PER_ELEMENT] = nid;
-               printk(KERN_CONT "%ld ", pfn);
+               printk(KERN_CONT "%lx ", pfn);
        }
        printk(KERN_CONT "\n");
 }
@@ -100,7 +100,6 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
 #endif
 
 extern unsigned long find_max_low_pfn(void);
-extern void add_one_highpage_init(struct page *, int, int);
 extern unsigned long highend_pfn, highstart_pfn;
 
 #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE)
@@ -118,7 +117,7 @@ static unsigned long kva_pages;
  */
 int __init get_memcfg_numa_flat(void)
 {
-       printk("NUMA - single node, flat memory mode\n");
+       printk(KERN_DEBUG "NUMA - single node, flat memory mode\n");
 
        node_start_pfn[0] = 0;
        node_end_pfn[0] = max_pfn;
@@ -157,32 +156,32 @@ static void __init propagate_e820_map_node(int nid)
  */
 static void __init allocate_pgdat(int nid)
 {
-       if (nid && node_has_online_mem(nid))
+       char buf[16];
+
+       if (node_has_online_mem(nid) && node_remap_start_vaddr[nid])
                NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid];
        else {
                unsigned long pgdat_phys;
                pgdat_phys = find_e820_area(min_low_pfn<<PAGE_SHIFT,
-                                (nid ? max_low_pfn:max_pfn_mapped)<<PAGE_SHIFT,
+                                max_pfn_mapped<<PAGE_SHIFT,
                                 sizeof(pg_data_t),
                                 PAGE_SIZE);
                NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(pgdat_phys>>PAGE_SHIFT));
-               reserve_early(pgdat_phys, pgdat_phys + sizeof(pg_data_t),
-                             "NODE_DATA");
+               memset(buf, 0, sizeof(buf));
+               sprintf(buf, "NODE_DATA %d",  nid);
+               reserve_early(pgdat_phys, pgdat_phys + sizeof(pg_data_t), buf);
        }
        printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n",
                nid, (unsigned long)NODE_DATA(nid));
 }
 
-#ifdef CONFIG_DISCONTIGMEM
 /*
- * In the discontig memory model, a portion of the kernel virtual area (KVA)
- * is reserved and portions of nodes are mapped using it. This is to allow
- * node-local memory to be allocated for structures that would normally require
- * ZONE_NORMAL. The memory is allocated with alloc_remap() and callers
- * should be prepared to allocate from the bootmem allocator instead. This KVA
- * mechanism is incompatible with SPARSEMEM as it makes assumptions about the
- * layout of memory that are broken if alloc_remap() succeeds for some of the
- * map and fails for others
+ * In the DISCONTIGMEM and SPARSEMEM memory model, a portion of the kernel
+ * virtual address space (KVA) is reserved and portions of nodes are mapped
+ * using it. This is to allow node-local memory to be allocated for
+ * structures that would normally require ZONE_NORMAL. The memory is
+ * allocated with alloc_remap() and callers should be prepared to allocate
+ * from the bootmem allocator instead.
  */
 static unsigned long node_remap_start_pfn[MAX_NUMNODES];
 static void *node_remap_end_vaddr[MAX_NUMNODES];
@@ -204,7 +203,7 @@ void *alloc_remap(int nid, unsigned long size)
        return allocation;
 }
 
-void __init remap_numa_kva(void)
+static void __init remap_numa_kva(void)
 {
        void *vaddr;
        unsigned long pfn;
@@ -230,14 +229,14 @@ static unsigned long calculate_numa_remap_pages(void)
        unsigned long size, reserve_pages = 0;
 
        for_each_online_node(nid) {
-               u64 node_end_target;
-               u64 node_end_final;
+               u64 node_kva_target;
+               u64 node_kva_final;
 
                /*
                 * The acpi/srat node info can show hot-add memroy zones
                 * where memory could be added but not currently present.
                 */
-               printk("node %d pfn: [%lx - %lx]\n",
+               printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n",
                        nid, node_start_pfn[nid], node_end_pfn[nid]);
                if (node_start_pfn[nid] > max_pfn)
                        continue;
@@ -254,44 +253,48 @@ static unsigned long calculate_numa_remap_pages(void)
                /* now the roundup is correct, convert to PAGE_SIZE pages */
                size = size * PTRS_PER_PTE;
 
-               node_end_target = round_down(node_end_pfn[nid] - size,
+               node_kva_target = round_down(node_end_pfn[nid] - size,
                                                 PTRS_PER_PTE);
-               node_end_target <<= PAGE_SHIFT;
+               node_kva_target <<= PAGE_SHIFT;
                do {
-                       node_end_final = find_e820_area(node_end_target,
+                       node_kva_final = find_e820_area(node_kva_target,
                                        ((u64)node_end_pfn[nid])<<PAGE_SHIFT,
                                                ((u64)size)<<PAGE_SHIFT,
                                                LARGE_PAGE_BYTES);
-                       node_end_target -= LARGE_PAGE_BYTES;
-               } while (node_end_final == -1ULL &&
-                        (node_end_target>>PAGE_SHIFT) > (node_start_pfn[nid]));
+                       node_kva_target -= LARGE_PAGE_BYTES;
+               } while (node_kva_final == -1ULL &&
+                        (node_kva_target>>PAGE_SHIFT) > (node_start_pfn[nid]));
 
-               if (node_end_final == -1ULL)
+               if (node_kva_final == -1ULL)
                        panic("Can not get kva ram\n");
 
-               printk("Reserving %ld pages of KVA for lmem_map of node %d\n",
-                               size, nid);
                node_remap_size[nid] = size;
                node_remap_offset[nid] = reserve_pages;
                reserve_pages += size;
-               printk("Shrinking node %d from %ld pages to %lld pages\n",
-                       nid, node_end_pfn[nid], node_end_final>>PAGE_SHIFT);
+               printk(KERN_DEBUG "Reserving %ld pages of KVA for lmem_map of"
+                                 " node %d at %llx\n",
+                               size, nid, node_kva_final>>PAGE_SHIFT);
 
                /*
                 *  prevent kva address below max_low_pfn want it on system
                 *  with less memory later.
                 *  layout will be: KVA address , KVA RAM
+                *
+                *  we are supposed to only record the one less then max_low_pfn
+                *  but we could have some hole in high memory, and it will only
+                *  check page_is_ram(pfn) && !page_is_reserved_early(pfn) to decide
+                *  to use it as free.
+                *  So reserve_early here, hope we don't run out of that array
                 */
-               if ((node_end_final>>PAGE_SHIFT) < max_low_pfn)
-                       reserve_early(node_end_final,
-                                     node_end_final+(((u64)size)<<PAGE_SHIFT),
-                                     "KVA RAM");
-
-               node_end_pfn[nid] = node_end_final>>PAGE_SHIFT;
-               node_remap_start_pfn[nid] = node_end_pfn[nid];
-               shrink_active_range(nid, node_end_pfn[nid]);
+               reserve_early(node_kva_final,
+                             node_kva_final+(((u64)size)<<PAGE_SHIFT),
+                             "KVA RAM");
+
+               node_remap_start_pfn[nid] = node_kva_final>>PAGE_SHIFT;
+               remove_active_range(nid, node_remap_start_pfn[nid],
+                                        node_remap_start_pfn[nid] + size);
        }
-       printk("Reserving total of %ld pages for numa KVA remap\n",
+       printk(KERN_INFO "Reserving total of %lx pages for numa KVA remap\n",
                        reserve_pages);
        return reserve_pages;
 }
@@ -305,35 +308,15 @@ static void init_remap_allocator(int nid)
        node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] +
                ALIGN(sizeof(pg_data_t), PAGE_SIZE);
 
-       printk ("node %d will remap to vaddr %08lx - %08lx\n", nid,
+       printk(KERN_DEBUG "node %d will remap to vaddr %08lx - %08lx\n", nid,
                (ulong) node_remap_start_vaddr[nid],
                (ulong) node_remap_end_vaddr[nid]);
 }
-#else
-void *alloc_remap(int nid, unsigned long size)
-{
-       return NULL;
-}
-
-static unsigned long calculate_numa_remap_pages(void)
-{
-       return 0;
-}
-
-static void init_remap_allocator(int nid)
-{
-}
-
-void __init remap_numa_kva(void)
-{
-}
-#endif /* CONFIG_DISCONTIGMEM */
 
-extern void setup_bootmem_allocator(void);
-unsigned long __init setup_memory(void)
+void __init initmem_init(unsigned long start_pfn,
+                                 unsigned long end_pfn)
 {
        int nid;
-       unsigned long system_start_pfn, system_max_low_pfn;
        long kva_target_pfn;
 
        /*
@@ -344,17 +327,10 @@ unsigned long __init setup_memory(void)
         * and ZONE_HIGHMEM.
         */
 
-       /* call find_max_low_pfn at first, it could update max_pfn */
-       system_max_low_pfn = max_low_pfn = find_max_low_pfn();
-
-       remove_all_active_ranges();
        get_memcfg_numa();
 
        kva_pages = round_up(calculate_numa_remap_pages(), PTRS_PER_PTE);
 
-       /* partially used pages are not usable - thus round upwards */
-       system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end);
-
        kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE);
        do {
                kva_start_pfn = find_e820_area(kva_target_pfn<<PAGE_SHIFT,
@@ -367,9 +343,9 @@ unsigned long __init setup_memory(void)
        if (kva_start_pfn == -1UL)
                panic("Can not get kva space\n");
 
-       printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n",
+       printk(KERN_INFO "kva_start_pfn ~ %lx max_low_pfn ~ %lx\n",
                kva_start_pfn, max_low_pfn);
-       printk("max_pfn = %ld\n", max_pfn);
+       printk(KERN_INFO "max_pfn = %lx\n", max_pfn);
 
        /* avoid clash with initrd */
        reserve_early(kva_start_pfn<<PAGE_SHIFT,
@@ -377,62 +353,50 @@ unsigned long __init setup_memory(void)
                     "KVA PG");
 #ifdef CONFIG_HIGHMEM
        highstart_pfn = highend_pfn = max_pfn;
-       if (max_pfn > system_max_low_pfn)
-               highstart_pfn = system_max_low_pfn;
+       if (max_pfn > max_low_pfn)
+               highstart_pfn = max_low_pfn;
        printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
               pages_to_mb(highend_pfn - highstart_pfn));
        num_physpages = highend_pfn;
        high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
 #else
-       num_physpages = system_max_low_pfn;
-       high_memory = (void *) __va(system_max_low_pfn * PAGE_SIZE - 1) + 1;
+       num_physpages = max_low_pfn;
+       high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
 #endif
        printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
-                       pages_to_mb(system_max_low_pfn));
-       printk("min_low_pfn = %ld, max_low_pfn = %ld, highstart_pfn = %ld\n", 
-                       min_low_pfn, max_low_pfn, highstart_pfn);
+                       pages_to_mb(max_low_pfn));
+       printk(KERN_DEBUG "max_low_pfn = %lx, highstart_pfn = %lx\n",
+                       max_low_pfn, highstart_pfn);
 
-       printk("Low memory ends at vaddr %08lx\n",
+       printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n",
                        (ulong) pfn_to_kaddr(max_low_pfn));
        for_each_online_node(nid) {
                init_remap_allocator(nid);
 
                allocate_pgdat(nid);
        }
-       printk("High memory starts at vaddr %08lx\n",
+       remap_numa_kva();
+
+       printk(KERN_DEBUG "High memory starts at vaddr %08lx\n",
                        (ulong) pfn_to_kaddr(highstart_pfn));
        for_each_online_node(nid)
                propagate_e820_map_node(nid);
 
-       memset(NODE_DATA(0), 0, sizeof(struct pglist_data));
+       for_each_online_node(nid)
+               memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
+
        NODE_DATA(0)->bdata = &node0_bdata;
        setup_bootmem_allocator();
-       return max_low_pfn;
-}
-
-void __init zone_sizes_init(void)
-{
-       unsigned long max_zone_pfns[MAX_NR_ZONES];
-       memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
-       max_zone_pfns[ZONE_DMA] =
-               virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
-       max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
-#ifdef CONFIG_HIGHMEM
-       max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
-#endif
-
-       free_area_init_nodes(max_zone_pfns);
-       return;
 }
 
-void __init set_highmem_pages_init(int bad_ppro) 
+void __init set_highmem_pages_init(void)
 {
 #ifdef CONFIG_HIGHMEM
        struct zone *zone;
-       struct page *page;
+       int nid;
 
        for_each_zone(zone) {
-               unsigned long node_pfn, zone_start_pfn, zone_end_pfn;
+               unsigned long zone_start_pfn, zone_end_pfn;
 
                if (!is_highmem(zone))
                        continue;
@@ -440,16 +404,12 @@ void __init set_highmem_pages_init(int bad_ppro)
                zone_start_pfn = zone->zone_start_pfn;
                zone_end_pfn = zone_start_pfn + zone->spanned_pages;
 
-               printk("Initializing %s for node %d (%08lx:%08lx)\n",
-                               zone->name, zone_to_nid(zone),
-                               zone_start_pfn, zone_end_pfn);
+               nid = zone_to_nid(zone);
+               printk(KERN_INFO "Initializing %s for node %d (%08lx:%08lx)\n",
+                               zone->name, nid, zone_start_pfn, zone_end_pfn);
 
-               for (node_pfn = zone_start_pfn; node_pfn < zone_end_pfn; node_pfn++) {
-                       if (!pfn_valid(node_pfn))
-                               continue;
-                       page = pfn_to_page(node_pfn);
-                       add_one_highpage_init(page, node_pfn, bad_ppro);
-               }
+               add_highpages_with_active_regions(nid, zone_start_pfn,
+                                zone_end_pfn);
        }
        totalram_pages += totalhigh_pages;
 #endif
@@ -482,3 +442,4 @@ int memory_add_physaddr_to_nid(u64 addr)
 
 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 #endif
+