X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=arch%2Fx86%2Fmm%2Ffault_64.c;h=dd26e680a43166ca9abe0ced6390989edddf4349;hb=3c1df68b848b39270752ff8d4b956cc4a4dce0f6;hp=dcf430bb62ee92a91f2a1f6ef9092a059115178a;hpb=c4aba4a8ec795124394bc79e3e8dbbc319338a98;p=linux-2.6 diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c index dcf430bb62..dd26e680a4 100644 --- a/arch/x86/mm/fault_64.c +++ b/arch/x86/mm/fault_64.c @@ -39,10 +39,10 @@ * bit 3 == 1 means use of reserved bit detected * bit 4 == 1 means fault was an instruction fetch */ -#define PF_PROT (1<<0) +#define PF_PROT (1<<0) #define PF_WRITE (1<<1) -#define PF_USER (1<<2) -#define PF_RSVD (1<<3) +#define PF_USER (1<<2) +#define PF_RSVD (1<<3) #define PF_INSTR (1<<4) static inline int notify_page_fault(struct pt_regs *regs) @@ -64,83 +64,6 @@ static inline int notify_page_fault(struct pt_regs *regs) #endif } -#ifdef CONFIG_X86_32 -/* - * Return EIP plus the CS segment base. The segment limit is also - * adjusted, clamped to the kernel/user address space (whichever is - * appropriate), and returned in *eip_limit. - * - * The segment is checked, because it might have been changed by another - * task between the original faulting instruction and here. - * - * If CS is no longer a valid code segment, or if EIP is beyond the - * limit, or if it is a kernel address when CS is not a kernel segment, - * then the returned value will be greater than *eip_limit. - * - * This is slow, but is very rarely executed. - */ -static inline unsigned long get_segment_eip(struct pt_regs *regs, - unsigned long *eip_limit) -{ - unsigned long ip = regs->ip; - unsigned seg = regs->cs & 0xffff; - u32 seg_ar, seg_limit, base, *desc; - - /* Unlikely, but must come before segment checks. */ - if (unlikely(regs->flags & VM_MASK)) { - base = seg << 4; - *eip_limit = base + 0xffff; - return base + (ip & 0xffff); - } - - /* The standard kernel/user address space limit. */ - *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg; - - /* By far the most common cases. */ - if (likely(SEGMENT_IS_FLAT_CODE(seg))) - return ip; - - /* Check the segment exists, is within the current LDT/GDT size, - that kernel/user (ring 0..3) has the appropriate privilege, - that it's a code segment, and get the limit. */ - __asm__("larl %3,%0; lsll %3,%1" - : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg)); - if ((~seg_ar & 0x9800) || ip > seg_limit) { - *eip_limit = 0; - return 1; /* So that returned ip > *eip_limit. */ - } - - /* Get the GDT/LDT descriptor base. - When you look for races in this code remember that - LDT and other horrors are only used in user space. */ - if (seg & (1<<2)) { - /* Must lock the LDT while reading it. */ - mutex_lock(¤t->mm->context.lock); - desc = current->mm->context.ldt; - desc = (void *)desc + (seg & ~7); - } else { - /* Must disable preemption while reading the GDT. */ - desc = (u32 *)get_cpu_gdt_table(get_cpu()); - desc = (void *)desc + (seg & ~7); - } - - /* Decode the code segment base from the descriptor */ - base = get_desc_base((struct desc_struct *)desc); - - if (seg & (1<<2)) - mutex_unlock(¤t->mm->context.lock); - else - put_cpu(); - - /* Adjust EIP and segment limit, and clamp at the kernel limit. - It's legitimate for segments to wrap at 0xffffffff. */ - seg_limit += base; - if (seg_limit < *eip_limit && seg_limit >= base) - *eip_limit = seg_limit; - return ip + base; -} -#endif - /* * X86_32 * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch. @@ -161,7 +84,6 @@ static int is_prefetch(struct pt_regs *regs, unsigned long addr, unsigned char *max_instr; #ifdef CONFIG_X86_32 - unsigned long limit; if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD && boot_cpu_data.x86 >= 6)) { /* Catch an obscure case of prefetch inside an NX page. */ @@ -170,30 +92,23 @@ static int is_prefetch(struct pt_regs *regs, unsigned long addr, } else { return 0; } - instr = (unsigned char *)get_segment_eip(regs, &limit); #else /* If it was a exec fault ignore */ if (error_code & PF_INSTR) return 0; - instr = (unsigned char __user *)convert_rip_to_linear(current, regs); #endif + instr = (unsigned char *)convert_ip_to_linear(current, regs); max_instr = instr + 15; -#ifdef CONFIG_X86_64 if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) return 0; -#endif while (scan_more && instr < max_instr) { unsigned char opcode; unsigned char instr_hi; unsigned char instr_lo; -#ifdef CONFIG_X86_32 - if (instr > (unsigned char *)limit) - break; -#endif if (probe_kernel_address(instr, opcode)) break; @@ -235,10 +150,7 @@ static int is_prefetch(struct pt_regs *regs, unsigned long addr, case 0x00: /* Prefetch instruction is 0x0F0D or 0x0F18 */ scan_more = 0; -#ifdef CONFIG_X86_32 - if (instr > (unsigned char *)limit) - break; -#endif + if (probe_kernel_address(instr, opcode)) break; prefetch = (instr_lo == 0xF) && @@ -311,6 +223,7 @@ KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n" KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n" KERN_ERR "******* Please consider a BIOS update.\n" KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n"; +#endif /* Workaround for K8 erratum #93 & buggy BIOS. BIOS SMM functions are required to use a specific workaround @@ -318,10 +231,12 @@ KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n"; A lot of BIOS that didn't get tested properly miss this. The OS sees this as a page fault with the upper 32bits of RIP cleared. Try to work around it here. - Note we only handle faults in kernel here. */ - + Note we only handle faults in kernel here. + Does nothing for X86_32 + */ static int is_errata93(struct pt_regs *regs, unsigned long address) { +#ifdef CONFIG_X86_64 static int warned; if (address != regs->ip) return 0; @@ -337,9 +252,9 @@ static int is_errata93(struct pt_regs *regs, unsigned long address) regs->ip = address; return 1; } +#endif return 0; } -#endif static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, unsigned long error_code) @@ -366,6 +281,26 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, */ static int vmalloc_fault(unsigned long address) { +#ifdef CONFIG_X86_32 + unsigned long pgd_paddr; + pmd_t *pmd_k; + pte_t *pte_k; + /* + * Synchronize this task's top level page-table + * with the 'reference' page table. + * + * Do _not_ use "current" here. We might be inside + * an interrupt in the middle of a task switch.. + */ + pgd_paddr = read_cr3(); + pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); + if (!pmd_k) + return -1; + pte_k = pte_offset_kernel(pmd_k, address); + if (!pte_present(*pte_k)) + return -1; + return 0; +#else pgd_t *pgd, *pgd_ref; pud_t *pud, *pud_ref; pmd_t *pmd, *pmd_ref; @@ -409,6 +344,7 @@ static int vmalloc_fault(unsigned long address) if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref)) BUG(); return 0; +#endif } int show_unhandled_signals = 1; @@ -443,6 +379,8 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, si_code = SEGV_MAPERR; + if (notify_page_fault(regs)) + return; /* * We fault-in kernel-space virtual memory on-demand. The @@ -468,8 +406,6 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, if (vmalloc_fault(address) >= 0) return; } - if (notify_page_fault(regs)) - return; /* * Don't take the mm semaphore here. If we fixup a prefetch * fault we could otherwise deadlock. @@ -477,9 +413,6 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, goto bad_area_nosemaphore; } - if (notify_page_fault(regs)) - return; - if (likely(regs->flags & X86_EFLAGS_IF)) local_irq_enable(); @@ -531,8 +464,11 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; if (error_code & PF_USER) { - /* Allow userspace just enough access below the stack pointer - * to let the 'enter' instruction work. + /* + * Accessing the stack below %sp is always a bug. + * The large cushion allows instructions like enter + * and pusha to work. ("enter $65535,$31" pushes + * 32 pointers and then decrements %sp by 65535.) */ if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp) goto bad_area; @@ -578,6 +514,17 @@ good_area: tsk->maj_flt++; else tsk->min_flt++; + +#ifdef CONFIG_X86_32 + /* + * Did it hit the DOS screen memory VA from vm86 mode? + */ + if (v8086_mode(regs)) { + unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT; + if (bit < 32) + tsk->thread.screen_bitmap |= 1 << bit; + } +#endif up_read(&mm->mmap_sem); return; @@ -613,10 +560,16 @@ bad_area_nosemaphore: if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && printk_ratelimit()) { printk( - "%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx\n", - tsk->pid > 1 ? KERN_INFO : KERN_EMERG, - tsk->comm, tsk->pid, address, regs->ip, - regs->sp, error_code); +#ifdef CONFIG_X86_32 + "%s%s[%d]: segfault at %lx ip %08lx sp %08lx error %lx", +#else + "%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx", +#endif + task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, + tsk->comm, task_pid_nr(tsk), address, regs->ip, + regs->sp, error_code); + print_vma_addr(" in ", regs->ip); + printk("\n"); } tsk->thread.cr2 = address; @@ -655,7 +608,7 @@ no_context: else printk(KERN_ALERT "Unable to handle kernel paging request"); printk(" at %016lx RIP: \n" KERN_ALERT, address); - printk_address(regs->ip); + printk_address(regs->ip, 1); dump_pagetable(address); tsk->thread.cr2 = address; tsk->thread.trap_no = 14; @@ -677,7 +630,7 @@ out_of_memory: goto again; } printk("VM: killing process %s\n", tsk->comm); - if (error_code & 4) + if (error_code & PF_USER) do_group_exit(SIGKILL); goto no_context; @@ -700,10 +653,12 @@ LIST_HEAD(pgd_list); void vmalloc_sync_all(void) { - /* Note that races in the updates of insync and start aren't - problematic: - insync can only get set bits added, and updates to start are only - improving performance (without affecting correctness if undone). */ + /* + * Note that races in the updates of insync and start aren't + * problematic: insync can only get set bits added, and updates to + * start are only improving performance (without affecting correctness + * if undone). + */ static DECLARE_BITMAP(insync, PTRS_PER_PGD); static unsigned long start = VMALLOC_START & PGDIR_MASK; unsigned long address;