2 * linux/arch/x86_64/entry.S
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
12 * entry.S contains the system-call and fault low-level handling routines.
14 * NOTE: This code handles signal-recognition, which happens every time
15 * after an interrupt and after each system call.
17 * Normal syscalls and interrupts don't save a full stack frame, this is
18 * only done for syscall tracing, signals or fork/exec et.al.
20 * A note on terminology:
21 * - top of stack: Architecture defined interrupt frame from SS to RIP
22 * at the top of the kernel process stack.
23 * - partial stack frame: partially saved registers upto R11.
24 * - full stack frame: Like partial stack frame, but all register saved.
27 * - schedule it carefully for the final hardware.
31 #include <linux/config.h>
32 #include <linux/linkage.h>
33 #include <asm/segment.h>
35 #include <asm/cache.h>
36 #include <asm/errno.h>
37 #include <asm/dwarf2.h>
38 #include <asm/calling.h>
39 #include <asm/offset.h>
41 #include <asm/unistd.h>
42 #include <asm/thread_info.h>
43 #include <asm/hw_irq.h>
48 #define preempt_stop cli
51 #define retint_kernel retint_restore_args
55 * C code is not supposed to know about undefined top of stack. Every time
56 * a C function with an pt_regs argument is called from the SYSCALL based
57 * fast path FIXUP_TOP_OF_STACK is needed.
58 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
62 /* %rsp:at FRAMEEND */
63 .macro FIXUP_TOP_OF_STACK tmp
64 movq %gs:pda_oldrsp,\tmp
66 movq $__USER_DS,SS(%rsp)
67 movq $__USER_CS,CS(%rsp)
69 movq R11(%rsp),\tmp /* get eflags */
70 movq \tmp,EFLAGS(%rsp)
73 .macro RESTORE_TOP_OF_STACK tmp,offset=0
74 movq RSP-\offset(%rsp),\tmp
75 movq \tmp,%gs:pda_oldrsp
76 movq EFLAGS-\offset(%rsp),\tmp
77 movq \tmp,R11-\offset(%rsp)
80 .macro FAKE_STACK_FRAME child_rip
81 /* push in order ss, rsp, eflags, cs, rip */
84 CFI_ADJUST_CFA_OFFSET 8
86 CFI_ADJUST_CFA_OFFSET 8
88 pushq $(1<<9) /* eflags - interrupts on */
89 CFI_ADJUST_CFA_OFFSET 8
90 pushq $__KERNEL_CS /* cs */
91 CFI_ADJUST_CFA_OFFSET 8
92 pushq \child_rip /* rip */
93 CFI_ADJUST_CFA_OFFSET 8
95 pushq %rax /* orig rax */
96 CFI_ADJUST_CFA_OFFSET 8
99 .macro UNFAKE_STACK_FRAME
101 CFI_ADJUST_CFA_OFFSET -(6*8)
104 .macro CFI_DEFAULT_STACK
105 CFI_ADJUST_CFA_OFFSET (SS)
106 CFI_OFFSET r15,R15-SS
107 CFI_OFFSET r14,R14-SS
108 CFI_OFFSET r13,R13-SS
109 CFI_OFFSET r12,R12-SS
110 CFI_OFFSET rbp,RBP-SS
111 CFI_OFFSET rbx,RBX-SS
112 CFI_OFFSET r11,R11-SS
113 CFI_OFFSET r10,R10-SS
116 CFI_OFFSET rax,RAX-SS
117 CFI_OFFSET rcx,RCX-SS
118 CFI_OFFSET rdx,RDX-SS
119 CFI_OFFSET rsi,RSI-SS
120 CFI_OFFSET rdi,RDI-SS
121 CFI_OFFSET rsp,RSP-SS
122 CFI_OFFSET rip,RIP-SS
125 * A newly forked process directly context switches into this.
132 GET_THREAD_INFO(%rcx)
133 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
137 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
138 je int_ret_from_sys_call
139 testl $_TIF_IA32,threadinfo_flags(%rcx)
140 jnz int_ret_from_sys_call
141 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
142 jmp ret_from_sys_call
145 call syscall_trace_leave
146 GET_THREAD_INFO(%rcx)
151 * System call entry. Upto 6 arguments in registers are supported.
153 * SYSCALL does not save anything on the stack and does not change the
159 * rax system call number
161 * rcx return address for syscall/sysret, C arg3
164 * r10 arg3 (--> moved to rcx for C)
167 * r11 eflags for syscall/sysret, temporary for C
168 * r12-r15,rbp,rbx saved by C code, not touched.
170 * Interrupts are off on entry.
171 * Only called from user space.
173 * XXX if we had a free scratch register we could save the RSP into the stack frame
174 * and report it properly in ps. Unfortunately we haven't.
180 movq %rsp,%gs:pda_oldrsp
181 movq %gs:pda_kernelstack,%rsp
184 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
185 movq %rcx,RIP-ARGOFFSET(%rsp)
186 GET_THREAD_INFO(%rcx)
187 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
189 cmpq $__NR_syscall_max,%rax
192 call *sys_call_table(,%rax,8) # XXX: rip relative
193 movq %rax,RAX-ARGOFFSET(%rsp)
195 * Syscall return path ending with SYSRET (fast path)
196 * Has incomplete stack frame and undefined top of stack.
198 .globl ret_from_sys_call
200 movl $_TIF_WORK_MASK,%edi
203 GET_THREAD_INFO(%rcx)
205 movl threadinfo_flags(%rcx),%edx
208 movq RIP-ARGOFFSET(%rsp),%rcx
209 RESTORE_ARGS 0,-ARG_SKIP,1
210 movq %gs:pda_oldrsp,%rsp
214 /* Handle reschedules */
215 /* edx: work, edi: workmask */
217 bt $TIF_NEED_RESCHED,%edx
225 /* Handle a signal */
228 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
231 /* Really a signal */
232 /* edx: work flags (arg3) */
233 leaq do_notify_resume(%rip),%rax
234 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
235 xorl %esi,%esi # oldset -> arg2
236 call ptregscall_common
237 1: movl $_TIF_NEED_RESCHED,%edi
240 /* Do syscall tracing */
243 movq $-ENOSYS,RAX(%rsp)
244 FIXUP_TOP_OF_STACK %rdi
246 call syscall_trace_enter
247 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
249 cmpq $__NR_syscall_max,%rax
251 movq %r10,%rcx /* fixup for C */
252 call *sys_call_table(,%rax,8)
253 movq %rax,RAX-ARGOFFSET(%rsp)
256 call syscall_trace_leave
257 RESTORE_TOP_OF_STACK %rbx
259 jmp ret_from_sys_call
262 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
263 jmp ret_from_sys_call
266 * Syscall return path ending with IRET.
267 * Has correct top of stack, but partial stack frame.
269 ENTRY(int_ret_from_sys_call)
271 testl $3,CS-ARGOFFSET(%rsp)
272 je retint_restore_args
273 movl $_TIF_ALLWORK_MASK,%edi
274 /* edi: mask to check */
276 GET_THREAD_INFO(%rcx)
277 movl threadinfo_flags(%rcx),%edx
282 /* Either reschedule or signal or syscall exit tracking needed. */
283 /* First do a reschedule test. */
284 /* edx: work, edi: workmask */
286 bt $TIF_NEED_RESCHED,%edx
294 /* handle signals and tracing -- both require a full stack frame */
298 /* Check for syscall exit trace */
299 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
302 leaq 8(%rsp),%rdi # &ptregs -> arg1
303 call syscall_trace_leave
305 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
309 testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
311 movq %rsp,%rdi # &ptregs -> arg1
312 xorl %esi,%esi # oldset -> arg2
313 call do_notify_resume
314 1: movl $_TIF_NEED_RESCHED,%edi
321 * Certain special system calls that need to save a complete full stack frame.
324 .macro PTREGSCALL label,func,arg
327 leaq \func(%rip),%rax
328 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
329 jmp ptregscall_common
332 PTREGSCALL stub_clone, sys_clone, %r8
333 PTREGSCALL stub_fork, sys_fork, %rdi
334 PTREGSCALL stub_vfork, sys_vfork, %rdi
335 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
336 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
337 PTREGSCALL stub_iopl, sys_iopl, %rsi
339 ENTRY(ptregscall_common)
342 CFI_ADJUST_CFA_OFFSET -8
345 FIXUP_TOP_OF_STACK %r11
347 RESTORE_TOP_OF_STACK %r11
351 CFI_ADJUST_CFA_OFFSET 8
358 CFI_ADJUST_CFA_OFFSET -8
361 FIXUP_TOP_OF_STACK %r11
363 GET_THREAD_INFO(%rcx)
364 bt $TIF_IA32,threadinfo_flags(%rcx)
366 RESTORE_TOP_OF_STACK %r11
373 CFI_ADJUST_CFA_OFFSET REST_SKIP
376 jmp int_ret_from_sys_call
380 * sigreturn is special because it needs to restore all registers on return.
381 * This cannot be done with SYSRET, so use the IRET return path instead.
383 ENTRY(stub_rt_sigreturn)
388 FIXUP_TOP_OF_STACK %r11
389 call sys_rt_sigreturn
390 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
392 jmp int_ret_from_sys_call
396 * Interrupt entry/exit.
398 * Interrupt entry points save only callee clobbered registers in fast path.
400 * Entry runs with interrupts off.
403 /* 0(%rsp): interrupt number */
404 .macro interrupt func
406 CFI_DEF_CFA rsp,(SS-RDI)
407 CFI_REL_OFFSET rsp,(RSP-ORIG_RAX)
408 CFI_REL_OFFSET rip,(RIP-ORIG_RAX)
410 #ifdef CONFIG_DEBUG_INFO
414 * Setup a stack frame pointer. This allows gdb to trace
415 * back to the original stack.
418 CFI_DEF_CFA_REGISTER rbp
421 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
426 1: addl $1,%gs:pda_irqcount # RED-PEN should check preempt count
427 movq %gs:pda_irqstackptr,%rax
429 pushq %rdi # save old stack
433 ENTRY(common_interrupt)
435 /* 0(%rsp): oldrsp-ARGOFFSET */
439 subl $1,%gs:pda_irqcount
440 #ifdef CONFIG_DEBUG_INFO
443 leaq ARGOFFSET(%rdi),%rsp
445 GET_THREAD_INFO(%rcx)
446 testl $3,CS-ARGOFFSET(%rsp)
449 /* Interrupt came from user space */
451 * Has a correct top of stack, but a partial stack frame
452 * %rcx: thread info. Interrupts off.
454 retint_with_reschedule:
455 movl $_TIF_WORK_MASK,%edi
457 movl threadinfo_flags(%rcx),%edx
469 .section __ex_table,"a"
470 .quad iret_label,bad_iret
473 /* force a signal here? this matches i386 behaviour */
474 /* running with kernel gs */
476 movq $-9999,%rdi /* better code? */
480 /* edi: workmask, edx: work */
482 bt $TIF_NEED_RESCHED,%edx
488 GET_THREAD_INFO(%rcx)
493 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
497 movq $-1,ORIG_RAX(%rsp)
498 xorq %rsi,%rsi # oldset
499 movq %rsp,%rdi # &pt_regs
500 call do_notify_resume
503 movl $_TIF_NEED_RESCHED,%edi
504 GET_THREAD_INFO(%rcx)
507 #ifdef CONFIG_PREEMPT
508 /* Returning to kernel space. Check if we need preemption */
509 /* rcx: threadinfo. interrupts off. */
512 cmpl $0,threadinfo_preempt_count(%rcx)
513 jnz retint_restore_args
514 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
515 jnc retint_restore_args
516 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
517 jnc retint_restore_args
518 call preempt_schedule_irq
526 .macro apicinterrupt num,func
533 ENTRY(thermal_interrupt)
534 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
537 ENTRY(reschedule_interrupt)
538 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
540 ENTRY(invalidate_interrupt)
541 apicinterrupt INVALIDATE_TLB_VECTOR,smp_invalidate_interrupt
543 ENTRY(call_function_interrupt)
544 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
547 #ifdef CONFIG_X86_LOCAL_APIC
548 ENTRY(apic_timer_interrupt)
549 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
551 ENTRY(error_interrupt)
552 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
554 ENTRY(spurious_interrupt)
555 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
559 * Exception entry points.
562 pushq $0 /* push error code/oldrax */
563 pushq %rax /* push real oldrax to the rdi slot */
568 .macro errorentry sym
574 /* error code is on the stack already */
575 /* handle NMI like exceptions that can happen everywhere */
576 .macro paranoidentry sym
580 movl $MSR_GS_BASE,%ecx
587 movq ORIG_RAX(%rsp),%rsi
588 movq $-1,ORIG_RAX(%rsp)
593 * Exception entry point. This expects an error code/orig_rax on the stack
594 * and the exception handler in %rax.
598 CFI_DEF_CFA rsp,(SS-RDI)
599 CFI_REL_OFFSET rsp,(RSP-RDI)
600 CFI_REL_OFFSET rip,(RIP-RDI)
601 /* rdi slot contains rax, oldrax contains error code */
604 CFI_ADJUST_CFA_OFFSET (14*8)
606 CFI_REL_OFFSET rsi,RSI
607 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
609 CFI_REL_OFFSET rdx,RDX
611 CFI_REL_OFFSET rcx,RCX
612 movq %rsi,10*8(%rsp) /* store rax */
613 CFI_REL_OFFSET rax,RAX
619 CFI_REL_OFFSET r10,R10
621 CFI_REL_OFFSET r11,R11
623 CFI_REL_OFFSET rbx,RBX
625 CFI_REL_OFFSET rbp,RBP
627 CFI_REL_OFFSET r12,R12
629 CFI_REL_OFFSET r13,R13
631 CFI_REL_OFFSET r14,R14
633 CFI_REL_OFFSET r15,R15
642 movq ORIG_RAX(%rsp),%rsi /* get error code */
643 movq $-1,ORIG_RAX(%rsp)
645 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
650 GET_THREAD_INFO(%rcx)
653 movl threadinfo_flags(%rcx),%edx
654 movl $_TIF_WORK_MASK,%edi
664 /* There are two places in the kernel that can potentially fault with
665 usergs. Handle them here. The exception handlers after
666 iret run with kernel gs again, so don't set the user space flag.
667 B stepping K8s sometimes report an truncated RIP for IRET
668 exceptions returning to compat mode. Check for these here too. */
669 leaq iret_label(%rip),%rbp
672 movl %ebp,%ebp /* zero extend */
675 cmpq $gs_change,RIP(%rsp)
679 /* Reload gs selector with exception handling */
680 /* edi: new selector */
687 2: mfence /* workaround */
692 .section __ex_table,"a"
694 .quad gs_change,bad_gs
697 /* running with kernelgs */
699 swapgs /* switch back to user gs */
706 * Create a kernel thread.
708 * C extern interface:
709 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
711 * asm input arguments:
712 * rdi: fn, rsi: arg, rdx: flags
716 FAKE_STACK_FRAME $child_rip
719 # rdi: flags, rsi: usp, rdx: will be &pt_regs
721 orq kernel_thread_flags(%rip),%rdi
734 * It isn't worth to check for reschedule here,
735 * so internally to the x86_64 port you can rely on kernel_thread()
736 * not to reschedule the child before returning, this avoids the need
737 * of hacks for example to fork off the per-CPU idle tasks.
738 * [Hopefully no generic code relies on the reschedule -AK]
748 * Here we are in the child and the registers are set as they were
749 * at kernel_thread() invocation in the parent.
759 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
761 * C extern interface:
762 * extern long execve(char *name, char **argv, char **envp)
764 * asm input arguments:
765 * rdi: name, rsi: argv, rdx: envp
767 * We want to fallback into:
768 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
770 * do_sys_execve asm fallback arguments:
771 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
781 je int_ret_from_sys_call
788 errorentry do_page_fault
790 ENTRY(coprocessor_error)
791 zeroentry do_coprocessor_error
793 ENTRY(simd_coprocessor_error)
794 zeroentry do_simd_coprocessor_error
796 ENTRY(device_not_available)
797 zeroentry math_state_restore
799 /* runs on exception stack */
803 CFI_ADJUST_CFA_OFFSET 8
804 paranoidentry do_debug
805 /* switch back to process stack to restore the state ptrace touched */
808 jnz paranoid_userspace
812 /* runs on exception stack */
816 CFI_ADJUST_CFA_OFFSET 8
818 /* ebx: no swapgs flag */
820 testl %ebx,%ebx /* swapgs needed? */
830 GET_THREAD_INFO(%rcx)
831 movl threadinfo_flags(%rcx),%edx
832 testl $_TIF_NEED_RESCHED,%edx
834 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
843 xorl %esi,%esi /* oldset */
844 movq %rsp,%rdi /* &pt_regs */
845 call do_notify_resume
853 zeroentry do_overflow
859 zeroentry do_invalid_op
861 ENTRY(coprocessor_segment_overrun)
862 zeroentry do_coprocessor_segment_overrun
865 zeroentry do_reserved
867 /* runs on exception stack */
870 paranoidentry do_double_fault
873 jnz paranoid_userspace
878 errorentry do_invalid_TSS
880 ENTRY(segment_not_present)
881 errorentry do_segment_not_present
883 /* runs on exception stack */
886 paranoidentry do_stack_segment
889 jnz paranoid_userspace
893 ENTRY(general_protection)
894 errorentry do_general_protection
896 ENTRY(alignment_check)
897 errorentry do_alignment_check
900 zeroentry do_divide_error
902 ENTRY(spurious_interrupt_bug)
903 zeroentry do_spurious_interrupt_bug
905 #ifdef CONFIG_X86_MCE
906 /* runs on exception stack */
910 CFI_ADJUST_CFA_OFFSET 8
911 paranoidentry do_machine_check
917 zeroentry do_call_debug