]> err.no Git - linux-2.6/blob - arch/x86/mm/init_64.c
x86: not clear empty_zero_page again
[linux-2.6] / arch / x86 / mm / init_64.c
1 /*
2  *  linux/arch/x86_64/mm/init.c
3  *
4  *  Copyright (C) 1995  Linus Torvalds
5  *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
6  *  Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
7  */
8
9 #include <linux/signal.h>
10 #include <linux/sched.h>
11 #include <linux/kernel.h>
12 #include <linux/errno.h>
13 #include <linux/string.h>
14 #include <linux/types.h>
15 #include <linux/ptrace.h>
16 #include <linux/mman.h>
17 #include <linux/mm.h>
18 #include <linux/swap.h>
19 #include <linux/smp.h>
20 #include <linux/init.h>
21 #include <linux/pagemap.h>
22 #include <linux/bootmem.h>
23 #include <linux/proc_fs.h>
24 #include <linux/pci.h>
25 #include <linux/pfn.h>
26 #include <linux/poison.h>
27 #include <linux/dma-mapping.h>
28 #include <linux/module.h>
29 #include <linux/memory_hotplug.h>
30 #include <linux/nmi.h>
31
32 #include <asm/processor.h>
33 #include <asm/system.h>
34 #include <asm/uaccess.h>
35 #include <asm/pgtable.h>
36 #include <asm/pgalloc.h>
37 #include <asm/dma.h>
38 #include <asm/fixmap.h>
39 #include <asm/e820.h>
40 #include <asm/apic.h>
41 #include <asm/tlb.h>
42 #include <asm/mmu_context.h>
43 #include <asm/proto.h>
44 #include <asm/smp.h>
45 #include <asm/sections.h>
46 #include <asm/kdebug.h>
47 #include <asm/numa.h>
48
49 #ifndef Dprintk
50 #define Dprintk(x...)
51 #endif
52
53 const struct dma_mapping_ops* dma_ops;
54 EXPORT_SYMBOL(dma_ops);
55
56 static unsigned long dma_reserve __initdata;
57
58 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
59
60 /*
61  * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
62  * physical space so we can cache the place of the first one and move
63  * around without checking the pgd every time.
64  */
65
66 void show_mem(void)
67 {
68         long i, total = 0, reserved = 0;
69         long shared = 0, cached = 0;
70         pg_data_t *pgdat;
71         struct page *page;
72
73         printk(KERN_INFO "Mem-info:\n");
74         show_free_areas();
75         printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
76
77         for_each_online_pgdat(pgdat) {
78                for (i = 0; i < pgdat->node_spanned_pages; ++i) {
79                         /* this loop can take a while with 256 GB and 4k pages
80                            so update the NMI watchdog */
81                         if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) {
82                                 touch_nmi_watchdog();
83                         }
84                         if (!pfn_valid(pgdat->node_start_pfn + i))
85                                 continue;
86                         page = pfn_to_page(pgdat->node_start_pfn + i);
87                         total++;
88                         if (PageReserved(page))
89                                 reserved++;
90                         else if (PageSwapCache(page))
91                                 cached++;
92                         else if (page_count(page))
93                                 shared += page_count(page) - 1;
94                }
95         }
96         printk(KERN_INFO "%lu pages of RAM\n", total);
97         printk(KERN_INFO "%lu reserved pages\n",reserved);
98         printk(KERN_INFO "%lu pages shared\n",shared);
99         printk(KERN_INFO "%lu pages swap cached\n",cached);
100 }
101
102 int after_bootmem;
103
104 static __init void *spp_getpage(void)
105
106         void *ptr;
107         if (after_bootmem)
108                 ptr = (void *) get_zeroed_page(GFP_ATOMIC); 
109         else
110                 ptr = alloc_bootmem_pages(PAGE_SIZE);
111         if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
112                 panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
113
114         Dprintk("spp_getpage %p\n", ptr);
115         return ptr;
116
117
118 static __init void set_pte_phys(unsigned long vaddr,
119                          unsigned long phys, pgprot_t prot)
120 {
121         pgd_t *pgd;
122         pud_t *pud;
123         pmd_t *pmd;
124         pte_t *pte, new_pte;
125
126         Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
127
128         pgd = pgd_offset_k(vaddr);
129         if (pgd_none(*pgd)) {
130                 printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
131                 return;
132         }
133         pud = pud_offset(pgd, vaddr);
134         if (pud_none(*pud)) {
135                 pmd = (pmd_t *) spp_getpage(); 
136                 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
137                 if (pmd != pmd_offset(pud, 0)) {
138                         printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
139                         return;
140                 }
141         }
142         pmd = pmd_offset(pud, vaddr);
143         if (pmd_none(*pmd)) {
144                 pte = (pte_t *) spp_getpage();
145                 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
146                 if (pte != pte_offset_kernel(pmd, 0)) {
147                         printk("PAGETABLE BUG #02!\n");
148                         return;
149                 }
150         }
151         new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
152
153         pte = pte_offset_kernel(pmd, vaddr);
154         if (!pte_none(*pte) &&
155             pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
156                 pte_ERROR(*pte);
157         set_pte(pte, new_pte);
158
159         /*
160          * It's enough to flush this one mapping.
161          * (PGE mappings get flushed as well)
162          */
163         __flush_tlb_one(vaddr);
164 }
165
166 /* NOTE: this is meant to be run only at boot */
167 void __init 
168 __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
169 {
170         unsigned long address = __fix_to_virt(idx);
171
172         if (idx >= __end_of_fixed_addresses) {
173                 printk("Invalid __set_fixmap\n");
174                 return;
175         }
176         set_pte_phys(address, phys, prot);
177 }
178
179 unsigned long __meminitdata table_start, table_end;
180
181 static __meminit void *alloc_low_page(unsigned long *phys)
182
183         unsigned long pfn = table_end++;
184         void *adr;
185
186         if (after_bootmem) {
187                 adr = (void *)get_zeroed_page(GFP_ATOMIC);
188                 *phys = __pa(adr);
189                 return adr;
190         }
191
192         if (pfn >= end_pfn) 
193                 panic("alloc_low_page: ran out of memory"); 
194
195         adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE);
196         memset(adr, 0, PAGE_SIZE);
197         *phys  = pfn * PAGE_SIZE;
198         return adr;
199 }
200
201 static __meminit void unmap_low_page(void *adr)
202
203
204         if (after_bootmem)
205                 return;
206
207         early_iounmap(adr, PAGE_SIZE);
208
209
210 /* Must run before zap_low_mappings */
211 __meminit void *early_ioremap(unsigned long addr, unsigned long size)
212 {
213         unsigned long vaddr;
214         pmd_t *pmd, *last_pmd;
215         int i, pmds;
216
217         pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
218         vaddr = __START_KERNEL_map;
219         pmd = level2_kernel_pgt;
220         last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
221         for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
222                 for (i = 0; i < pmds; i++) {
223                         if (pmd_present(pmd[i]))
224                                 goto next;
225                 }
226                 vaddr += addr & ~PMD_MASK;
227                 addr &= PMD_MASK;
228                 for (i = 0; i < pmds; i++, addr += PMD_SIZE)
229                         set_pmd(pmd+i, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
230                 __flush_tlb();
231                 return (void *)vaddr;
232         next:
233                 ;
234         }
235         printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
236         return NULL;
237 }
238
239 /* To avoid virtual aliases later */
240 __meminit void early_iounmap(void *addr, unsigned long size)
241 {
242         unsigned long vaddr;
243         pmd_t *pmd;
244         int i, pmds;
245
246         vaddr = (unsigned long)addr;
247         pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
248         pmd = level2_kernel_pgt + pmd_index(vaddr);
249         for (i = 0; i < pmds; i++)
250                 pmd_clear(pmd + i);
251         __flush_tlb();
252 }
253
254 static void __meminit
255 phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
256 {
257         int i = pmd_index(address);
258
259         for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
260                 unsigned long entry;
261                 pmd_t *pmd = pmd_page + pmd_index(address);
262
263                 if (address >= end) {
264                         if (!after_bootmem)
265                                 for (; i < PTRS_PER_PMD; i++, pmd++)
266                                         set_pmd(pmd, __pmd(0));
267                         break;
268                 }
269
270                 if (pmd_val(*pmd))
271                         continue;
272
273                 entry = __PAGE_KERNEL_LARGE|_PAGE_GLOBAL|address;
274                 entry &= __supported_pte_mask;
275                 set_pmd(pmd, __pmd(entry));
276         }
277 }
278
279 static void __meminit
280 phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
281 {
282         pmd_t *pmd = pmd_offset(pud,0);
283         spin_lock(&init_mm.page_table_lock);
284         phys_pmd_init(pmd, address, end);
285         spin_unlock(&init_mm.page_table_lock);
286         __flush_tlb_all();
287 }
288
289 static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
290
291         int i = pud_index(addr);
292
293
294         for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
295                 unsigned long pmd_phys;
296                 pud_t *pud = pud_page + pud_index(addr);
297                 pmd_t *pmd;
298
299                 if (addr >= end)
300                         break;
301
302                 if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) {
303                         set_pud(pud, __pud(0)); 
304                         continue;
305                 } 
306
307                 if (pud_val(*pud)) {
308                         phys_pmd_update(pud, addr, end);
309                         continue;
310                 }
311
312                 pmd = alloc_low_page(&pmd_phys);
313                 spin_lock(&init_mm.page_table_lock);
314                 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
315                 phys_pmd_init(pmd, addr, end);
316                 spin_unlock(&init_mm.page_table_lock);
317                 unmap_low_page(pmd);
318         }
319         __flush_tlb();
320
321
322 static void __init find_early_table_space(unsigned long end)
323 {
324         unsigned long puds, pmds, tables, start;
325
326         puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
327         pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
328         tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
329                  round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
330
331         /* RED-PEN putting page tables only on node 0 could
332            cause a hotspot and fill up ZONE_DMA. The page tables
333            need roughly 0.5KB per GB. */
334         start = 0x8000;
335         table_start = find_e820_area(start, end, tables);
336         if (table_start == -1UL)
337                 panic("Cannot find space for the kernel page tables");
338
339         table_start >>= PAGE_SHIFT;
340         table_end = table_start;
341
342         early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
343                 end, table_start << PAGE_SHIFT,
344                 (table_start << PAGE_SHIFT) + tables);
345 }
346
347 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
348    This runs before bootmem is initialized and gets pages directly from the 
349    physical memory. To access them they are temporarily mapped. */
350 void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
351
352         unsigned long next; 
353
354         Dprintk("init_memory_mapping\n");
355
356         /* 
357          * Find space for the kernel direct mapping tables.
358          * Later we should allocate these tables in the local node of the memory
359          * mapped.  Unfortunately this is done currently before the nodes are 
360          * discovered.
361          */
362         if (!after_bootmem)
363                 find_early_table_space(end);
364
365         start = (unsigned long)__va(start);
366         end = (unsigned long)__va(end);
367
368         for (; start < end; start = next) {
369                 unsigned long pud_phys; 
370                 pgd_t *pgd = pgd_offset_k(start);
371                 pud_t *pud;
372
373                 if (after_bootmem)
374                         pud = pud_offset(pgd, start & PGDIR_MASK);
375                 else
376                         pud = alloc_low_page(&pud_phys);
377
378                 next = start + PGDIR_SIZE;
379                 if (next > end) 
380                         next = end; 
381                 phys_pud_init(pud, __pa(start), __pa(next));
382                 if (!after_bootmem)
383                         set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
384                 unmap_low_page(pud);
385         } 
386
387         if (!after_bootmem)
388                 mmu_cr4_features = read_cr4();
389         __flush_tlb_all();
390 }
391
392 #ifndef CONFIG_NUMA
393 void __init paging_init(void)
394 {
395         unsigned long max_zone_pfns[MAX_NR_ZONES];
396         memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
397         max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
398         max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
399         max_zone_pfns[ZONE_NORMAL] = end_pfn;
400
401         memory_present(0, 0, end_pfn);
402         sparse_init();
403         free_area_init_nodes(max_zone_pfns);
404 }
405 #endif
406
407 /* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
408    from the CPU leading to inconsistent cache lines. address and size
409    must be aligned to 2MB boundaries. 
410    Does nothing when the mapping doesn't exist. */
411 void __init clear_kernel_mapping(unsigned long address, unsigned long size) 
412 {
413         unsigned long end = address + size;
414
415         BUG_ON(address & ~LARGE_PAGE_MASK);
416         BUG_ON(size & ~LARGE_PAGE_MASK); 
417         
418         for (; address < end; address += LARGE_PAGE_SIZE) { 
419                 pgd_t *pgd = pgd_offset_k(address);
420                 pud_t *pud;
421                 pmd_t *pmd;
422                 if (pgd_none(*pgd))
423                         continue;
424                 pud = pud_offset(pgd, address);
425                 if (pud_none(*pud))
426                         continue; 
427                 pmd = pmd_offset(pud, address);
428                 if (!pmd || pmd_none(*pmd))
429                         continue; 
430                 if (0 == (pmd_val(*pmd) & _PAGE_PSE)) { 
431                         /* Could handle this, but it should not happen currently. */
432                         printk(KERN_ERR 
433                "clear_kernel_mapping: mapping has been split. will leak memory\n"); 
434                         pmd_ERROR(*pmd); 
435                 }
436                 set_pmd(pmd, __pmd(0));                 
437         }
438         __flush_tlb_all();
439
440
441 /*
442  * Memory hotplug specific functions
443  */
444 void online_page(struct page *page)
445 {
446         ClearPageReserved(page);
447         init_page_count(page);
448         __free_page(page);
449         totalram_pages++;
450         num_physpages++;
451 }
452
453 #ifdef CONFIG_MEMORY_HOTPLUG
454 /*
455  * Memory is added always to NORMAL zone. This means you will never get
456  * additional DMA/DMA32 memory.
457  */
458 int arch_add_memory(int nid, u64 start, u64 size)
459 {
460         struct pglist_data *pgdat = NODE_DATA(nid);
461         struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
462         unsigned long start_pfn = start >> PAGE_SHIFT;
463         unsigned long nr_pages = size >> PAGE_SHIFT;
464         int ret;
465
466         init_memory_mapping(start, (start + size -1));
467
468         ret = __add_pages(zone, start_pfn, nr_pages);
469         if (ret)
470                 goto error;
471
472         return ret;
473 error:
474         printk("%s: Problem encountered in __add_pages!\n", __func__);
475         return ret;
476 }
477 EXPORT_SYMBOL_GPL(arch_add_memory);
478
479 #if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
480 int memory_add_physaddr_to_nid(u64 start)
481 {
482         return 0;
483 }
484 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
485 #endif
486
487 #endif /* CONFIG_MEMORY_HOTPLUG */
488
489 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
490                          kcore_vsyscall;
491
492 void __init mem_init(void)
493 {
494         long codesize, reservedpages, datasize, initsize;
495
496         pci_iommu_alloc();
497
498         /* clear_bss() already clear the empty_zero_page */
499
500         reservedpages = 0;
501
502         /* this will put all low memory onto the freelists */
503 #ifdef CONFIG_NUMA
504         totalram_pages = numa_free_all_bootmem();
505 #else
506         totalram_pages = free_all_bootmem();
507 #endif
508         reservedpages = end_pfn - totalram_pages -
509                                         absent_pages_in_range(0, end_pfn);
510
511         after_bootmem = 1;
512
513         codesize =  (unsigned long) &_etext - (unsigned long) &_text;
514         datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
515         initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
516
517         /* Register memory areas for /proc/kcore */
518         kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); 
519         kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, 
520                    VMALLOC_END-VMALLOC_START);
521         kclist_add(&kcore_kernel, &_stext, _end - _stext);
522         kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
523         kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, 
524                                  VSYSCALL_END - VSYSCALL_START);
525
526         printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
527                 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
528                 end_pfn << (PAGE_SHIFT-10),
529                 codesize >> 10,
530                 reservedpages << (PAGE_SHIFT-10),
531                 datasize >> 10,
532                 initsize >> 10);
533 }
534
535 void free_init_pages(char *what, unsigned long begin, unsigned long end)
536 {
537         unsigned long addr;
538
539         if (begin >= end)
540                 return;
541
542         printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
543         for (addr = begin; addr < end; addr += PAGE_SIZE) {
544                 ClearPageReserved(virt_to_page(addr));
545                 init_page_count(virt_to_page(addr));
546                 memset((void *)(addr & ~(PAGE_SIZE-1)),
547                         POISON_FREE_INITMEM, PAGE_SIZE);
548                 if (addr >= __START_KERNEL_map)
549                         change_page_attr_addr(addr, 1, __pgprot(0));
550                 free_page(addr);
551                 totalram_pages++;
552         }
553         if (addr > __START_KERNEL_map)
554                 global_flush_tlb();
555 }
556
557 void free_initmem(void)
558 {
559         free_init_pages("unused kernel memory",
560                         (unsigned long)(&__init_begin),
561                         (unsigned long)(&__init_end));
562 }
563
564 #ifdef CONFIG_DEBUG_RODATA
565
566 void mark_rodata_ro(void)
567 {
568         unsigned long start = (unsigned long)_stext, end;
569
570 #ifdef CONFIG_HOTPLUG_CPU
571         /* It must still be possible to apply SMP alternatives. */
572         if (num_possible_cpus() > 1)
573                 start = (unsigned long)_etext;
574 #endif
575
576 #ifdef CONFIG_KPROBES
577         start = (unsigned long)__start_rodata;
578 #endif
579         
580         end = (unsigned long)__end_rodata;
581         start = (start + PAGE_SIZE - 1) & PAGE_MASK;
582         end &= PAGE_MASK;
583         if (end <= start)
584                 return;
585
586         change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO);
587
588         printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
589                (end - start) >> 10);
590
591         /*
592          * change_page_attr_addr() requires a global_flush_tlb() call after it.
593          * We do this after the printk so that if something went wrong in the
594          * change, the printk gets out at least to give a better debug hint
595          * of who is the culprit.
596          */
597         global_flush_tlb();
598 }
599 #endif
600
601 #ifdef CONFIG_BLK_DEV_INITRD
602 void free_initrd_mem(unsigned long start, unsigned long end)
603 {
604         free_init_pages("initrd memory", start, end);
605 }
606 #endif
607
608 void __init reserve_bootmem_generic(unsigned long phys, unsigned len) 
609
610 #ifdef CONFIG_NUMA
611         int nid = phys_to_nid(phys);
612 #endif
613         unsigned long pfn = phys >> PAGE_SHIFT;
614         if (pfn >= end_pfn) {
615                 /* This can happen with kdump kernels when accessing firmware
616                    tables. */
617                 if (pfn < end_pfn_map)
618                         return;
619                 printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
620                                 phys, len);
621                 return;
622         }
623
624         /* Should check here against the e820 map to avoid double free */
625 #ifdef CONFIG_NUMA
626         reserve_bootmem_node(NODE_DATA(nid), phys, len);
627 #else                   
628         reserve_bootmem(phys, len);    
629 #endif
630         if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
631                 dma_reserve += len / PAGE_SIZE;
632                 set_dma_reserve(dma_reserve);
633         }
634 }
635
636 int kern_addr_valid(unsigned long addr) 
637
638         unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
639        pgd_t *pgd;
640        pud_t *pud;
641        pmd_t *pmd;
642        pte_t *pte;
643
644         if (above != 0 && above != -1UL)
645                 return 0; 
646         
647         pgd = pgd_offset_k(addr);
648         if (pgd_none(*pgd))
649                 return 0;
650
651         pud = pud_offset(pgd, addr);
652         if (pud_none(*pud))
653                 return 0; 
654
655         pmd = pmd_offset(pud, addr);
656         if (pmd_none(*pmd))
657                 return 0;
658         if (pmd_large(*pmd))
659                 return pfn_valid(pmd_pfn(*pmd));
660
661         pte = pte_offset_kernel(pmd, addr);
662         if (pte_none(*pte))
663                 return 0;
664         return pfn_valid(pte_pfn(*pte));
665 }
666
667 /* A pseudo VMA to allow ptrace access for the vsyscall page.  This only
668    covers the 64bit vsyscall page now. 32bit has a real VMA now and does
669    not need special handling anymore. */
670
671 static struct vm_area_struct gate_vma = {
672         .vm_start = VSYSCALL_START,
673         .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT),
674         .vm_page_prot = PAGE_READONLY_EXEC,
675         .vm_flags = VM_READ | VM_EXEC
676 };
677
678 struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
679 {
680 #ifdef CONFIG_IA32_EMULATION
681         if (test_tsk_thread_flag(tsk, TIF_IA32))
682                 return NULL;
683 #endif
684         return &gate_vma;
685 }
686
687 int in_gate_area(struct task_struct *task, unsigned long addr)
688 {
689         struct vm_area_struct *vma = get_gate_vma(task);
690         if (!vma)
691                 return 0;
692         return (addr >= vma->vm_start) && (addr < vma->vm_end);
693 }
694
695 /* Use this when you have no reliable task/vma, typically from interrupt
696  * context.  It is less reliable than using the task's vma and may give
697  * false positives.
698  */
699 int in_gate_area_no_task(unsigned long addr)
700 {
701         return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
702 }
703
704 const char *arch_vma_name(struct vm_area_struct *vma)
705 {
706         if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
707                 return "[vdso]";
708         if (vma == &gate_vma)
709                 return "[vsyscall]";
710         return NULL;
711 }
712
713 #ifdef CONFIG_SPARSEMEM_VMEMMAP
714 /*
715  * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
716  */
717 int __meminit vmemmap_populate(struct page *start_page,
718                                                 unsigned long size, int node)
719 {
720         unsigned long addr = (unsigned long)start_page;
721         unsigned long end = (unsigned long)(start_page + size);
722         unsigned long next;
723         pgd_t *pgd;
724         pud_t *pud;
725         pmd_t *pmd;
726
727         for (; addr < end; addr = next) {
728                 next = pmd_addr_end(addr, end);
729
730                 pgd = vmemmap_pgd_populate(addr, node);
731                 if (!pgd)
732                         return -ENOMEM;
733                 pud = vmemmap_pud_populate(pgd, addr, node);
734                 if (!pud)
735                         return -ENOMEM;
736
737                 pmd = pmd_offset(pud, addr);
738                 if (pmd_none(*pmd)) {
739                         pte_t entry;
740                         void *p = vmemmap_alloc_block(PMD_SIZE, node);
741                         if (!p)
742                                 return -ENOMEM;
743
744                         entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL_LARGE);
745                         set_pmd(pmd, __pmd(pte_val(entry)));
746
747                         printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n",
748                                 addr, addr + PMD_SIZE - 1, p, node);
749                 } else
750                         vmemmap_verify((pte_t *)pmd, node, addr, next);
751         }
752
753         return 0;
754 }
755 #endif