Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 | /*
* linux/arch/x86_64/entry.S
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
* Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
*
* $Id: entry.S,v 1.98 2003/05/12 14:39:00 ak Exp $
*/
/*
* entry.S contains the system-call and fault low-level handling routines.
*
* NOTE: This code handles signal-recognition, which happens every time
* after an interrupt and after each system call.
*
* Normal syscalls and interrupts don't save a full stack frame, this is
* only done for PT_TRACESYS, signals or fork/exec et.al.
*
* TODO:
* - schedule it carefully for the final hardware.
*
*/
#define ASSEMBLY 1
#include <linux/config.h>
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/current.h>
#include <asm/smp.h>
#include <asm/cache.h>
#include <asm/errno.h>
#include <asm/calling.h>
#include <asm/offset.h>
#include <asm/msr.h>
#include <asm/unistd.h>
#include <asm/hw_irq.h>
.code64
#define PDAREF(field) %gs:field
/*
* C code is not supposed to know about partial frames. Everytime a C function
* that looks at the pt_regs is called these two macros are executed around it.
* RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
* manipulation.
*/
/* %rsp:at FRAMEEND */
.macro FIXUP_TOP_OF_STACK tmp
movq PDAREF(pda_oldrsp),\tmp
movq \tmp,RSP(%rsp)
movq $__USER_DS,SS(%rsp)
movq $__USER_CS,CS(%rsp)
movq $-1,RCX(%rsp) /* contains return address, already in RIP */
movq R11(%rsp),\tmp /* get eflags */
movq \tmp,EFLAGS(%rsp)
.endm
.macro RESTORE_TOP_OF_STACK tmp,offset=0
movq RSP-\offset(%rsp),\tmp
movq \tmp,PDAREF(pda_oldrsp)
movq EFLAGS-\offset(%rsp),\tmp
movq \tmp,R11-\offset(%rsp)
.endm
/*
* A newly forked process directly context switches into this.
*/
ENTRY(ret_from_fork)
movq %rax,%rdi /* return value of __switch_to -> prev task */
call schedule_tail
GET_CURRENT(%rcx)
testb $PT_TRACESYS,tsk_ptrace(%rcx)
jnz 2f
1:
RESTORE_REST
testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
jz int_ret_from_sys_call
testl $ASM_THREAD_IA32,tsk_thread+thread_flags(%rcx)
jnz int_ret_from_sys_call
RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
jmp ret_from_sys_call
2:
movq %rsp,%rdi
call syscall_trace
GET_CURRENT(%rcx)
jmp 1b
/*
* System call entry. Upto 6 arguments in registers are supported.
*
* SYSCALL does not save anything on the stack and does not change the
* stack pointer. Gets the per CPU area from the hidden GS MSR and finds the
* current kernel stack.
*/
/*
* Register setup:
* rax system call number
* rdi arg0
* rcx return address for syscall/sysret, C arg3
* rsi arg1
* rdx arg2
* r10 arg3 (--> moved to rcx for C)
* r8 arg4
* r9 arg5
* r11 eflags for syscall/sysret, temporary for C
* r12-r15,rbp,rbx saved by C code, not touched.
*
* Interrupts are off on entry.
* Only called from user space.
*/
ENTRY(system_call)
swapgs
movq %rsp,PDAREF(pda_oldrsp)
movq PDAREF(pda_kernelstack),%rsp
sti
SAVE_ARGS 8,1
movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
movq %rcx,RIP-ARGOFFSET(%rsp)
GET_CURRENT(%rcx)
testl $PT_TRACESYS,tsk_ptrace(%rcx)
jne tracesys
cmpq $__NR_syscall_max,%rax
ja badsys
movq %r10,%rcx
call *sys_call_table(,%rax,8) # XXX: rip relative
movq %rax,RAX-ARGOFFSET(%rsp)
.globl ret_from_sys_call
ret_from_sys_call:
sysret_with_reschedule:
GET_CURRENT(%rcx)
cli
cmpq $0,tsk_need_resched(%rcx)
jne sysret_reschedule
cmpl $0,tsk_sigpending(%rcx)
jne sysret_signal
sysret_restore_args:
movq RIP-ARGOFFSET(%rsp),%rcx
RESTORE_ARGS 0,-ARG_SKIP,1
movq PDAREF(pda_oldrsp),%rsp
swapgs
sysretq
sysret_signal:
sti
xorl %esi,%esi # oldset
leaq -ARGOFFSET(%rsp),%rdi # regs
leaq do_signal(%rip),%rax
call ptregscall_common
sysret_signal_test:
GET_CURRENT(%rcx)
cli
cmpq $0,tsk_need_resched(%rcx)
je sysret_restore_args
sti
call schedule
jmp sysret_signal_test
sysret_reschedule:
sti
call schedule
jmp sysret_with_reschedule
tracesys:
SAVE_REST
movq $-ENOSYS,RAX(%rsp)
FIXUP_TOP_OF_STACK %rdi
movq %rsp,%rdi
call syscall_trace
LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST
cmpq $__NR_syscall_max,%rax
ja tracesys_done
tracesys_call: /* backtrace marker */
movq %r10,%rcx /* fixup for C */
call *sys_call_table(,%rax,8)
movq %rax,RAX-ARGOFFSET(%rsp)
tracesys_done: /* backtrace marker */
SAVE_REST
movq %rsp,%rdi
call syscall_trace
RESTORE_TOP_OF_STACK %rbx
RESTORE_REST
jmp ret_from_sys_call
badsys:
movq $0,ORIG_RAX-ARGOFFSET(%rsp)
movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
jmp ret_from_sys_call
/*
* Syscall return path ending with IRET.
* This can be either 64bit calls that require restoring of all registers
* (impossible with sysret) or 32bit calls.
*/
ENTRY(int_ret_from_sys_call)
intret_test_kernel:
testl $3,CS-ARGOFFSET(%rsp)
je retint_restore_args
intret_with_reschedule:
GET_CURRENT(%rcx)
cli
cmpq $0,tsk_need_resched(%rcx)
jne intret_reschedule
cmpl $0,tsk_sigpending(%rcx)
jne intret_signal
jmp retint_restore_args_swapgs
intret_reschedule:
sti
call schedule
jmp intret_with_reschedule
intret_signal:
sti
SAVE_REST
xorq %rsi,%rsi # oldset -> arg2
movq %rsp,%rdi # &ptregs -> arg1
call do_signal
RESTORE_REST
intret_signal_test:
GET_CURRENT(%rcx)
cli
cmpq $0,tsk_need_resched(%rcx)
je retint_restore_args_swapgs
sti
call schedule
# RED-PEN: can we lose signals here?
jmp intret_signal_test
/*
* Certain special system calls that need to save a complete stack frame.
*/
.macro PTREGSCALL label,func
.globl \label
\label:
leaq \func(%rip),%rax
jmp ptregscall_common
.endm
PTREGSCALL stub_clone, sys_clone
PTREGSCALL stub_fork, sys_fork
PTREGSCALL stub_vfork, sys_vfork
PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend
PTREGSCALL stub_sigaltstack, sys_sigaltstack
PTREGSCALL stub_iopl, sys_iopl
ENTRY(ptregscall_common)
popq %r11
SAVE_REST
movq %r11, %r15
FIXUP_TOP_OF_STACK %r11
call *%rax
RESTORE_TOP_OF_STACK %r11
movq %r15, %r11
RESTORE_REST
pushq %r11
ret
ENTRY(stub_execve)
popq %r11
SAVE_REST
movq %r11, %r15
FIXUP_TOP_OF_STACK %r11
call sys_execve
GET_CURRENT(%rcx)
testl $ASM_THREAD_IA32,tsk_thread+thread_flags(%rcx)
jnz exec_32bit
RESTORE_TOP_OF_STACK %r11
movq %r15, %r11
RESTORE_REST
push %r11
ret
exec_32bit:
movq %rax,RAX(%rsp)
RESTORE_REST
jmp int_ret_from_sys_call
/*
* sigreturn is special because it needs to restore all registers on return.
* This cannot be done with SYSRET, so use the IRET return path instead.
*/
ENTRY(stub_rt_sigreturn)
addq $8, %rsp
SAVE_REST
FIXUP_TOP_OF_STACK %r11
call sys_rt_sigreturn
movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
RESTORE_REST
jmp int_ret_from_sys_call
/*
* Interrupt entry/exit.
*
* Interrupt entry points save only callee clobbered registers, except
* for signals again.
*
* Entry runs with interrupts off.
*/
/* 0(%rsp): interrupt number */
ENTRY(common_interrupt)
testl $3,16(%rsp) # from kernel?
je 1f
swapgs
1: cld
#ifdef CONFIG_X86_REMOTE_DEBUG
SAVE_ALL
movq %rsp,%rdi
#else
SAVE_ARGS
leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
#endif
addl $1,PDAREF(pda_irqcount) # XXX: should be merged with irq.c irqcount
movq PDAREF(pda_irqstackptr),%rax
cmoveq %rax,%rsp
pushq %rdi # save old stack
call do_IRQ
/* 0(%rsp): oldrsp-ARGOFFSET */
ENTRY(ret_from_intr)
cli
popq %rdi
subl $1,PDAREF(pda_irqcount)
leaq ARGOFFSET(%rdi),%rsp
testl $3,CS(%rdi) # from kernel?
je retint_restore_args
/* Interrupt came from user space */
retint_with_reschedule:
GET_CURRENT(%rcx)
cmpq $0,tsk_need_resched(%rcx)
jne retint_reschedule
cmpl $0,tsk_sigpending(%rcx)
jne retint_signal
retint_restore_args_swapgs:
swapgs
retint_restore_args:
RESTORE_ARGS 0,8
iret_label:
iretq
.section __ex_table,"a"
.align 8
.quad iret_label,bad_iret
.previous
.section .fixup,"ax"
/* force a signal here? this matches i386 behaviour */
bad_iret:
movq $-9999,%rdi /* better code? */
jmp do_exit
.previous
retint_signal:
sti
SAVE_REST
movq $-1,ORIG_RAX(%rsp)
xorq %rsi,%rsi # oldset
movq %rsp,%rdi # &pt_regs
call do_signal
RESTORE_REST
retint_signal_test:
cli
GET_CURRENT(%rcx)
cmpq $0,tsk_need_resched(%rcx)
je retint_restore_args_swapgs
sti
call schedule
jmp retint_signal_test
retint_reschedule:
sti
call schedule
cli
jmp retint_with_reschedule
/* IF:off, stack contains irq number on origrax */
.macro IRQ_ENTER
cld
pushq %rdi
pushq %rsi
pushq %rdx
pushq %rcx
pushq %rax
pushq %r8
pushq %r9
pushq %r10
pushq %r11
leaq -48(%rsp),%rdi
testl $3,136(%rdi)
je 1f
swapgs
1: addl $1,%gs:pda_irqcount
movq %gs:pda_irqstackptr,%rax
cmoveq %rax,%rsp
pushq %rdi
.endm
.macro BUILD_SMP_INTERRUPT x,v
ENTRY(\x)
push $\v-256
IRQ_ENTER
call smp_\x
jmp ret_from_intr
.endm
#ifdef CONFIG_SMP
BUILD_SMP_INTERRUPT reschedule_interrupt,RESCHEDULE_VECTOR
BUILD_SMP_INTERRUPT invalidate_interrupt,INVALIDATE_TLB_VECTOR
BUILD_SMP_INTERRUPT call_function_interrupt,CALL_FUNCTION_VECTOR
#endif
#ifdef CONFIG_X86_LOCAL_APIC
BUILD_SMP_INTERRUPT apic_timer_interrupt,LOCAL_TIMER_VECTOR
BUILD_SMP_INTERRUPT error_interrupt,ERROR_APIC_VECTOR
BUILD_SMP_INTERRUPT spurious_interrupt,SPURIOUS_APIC_VECTOR
#endif
/*
* Exception entry points.
*/
.macro zeroentry sym
pushq $0 /* push error code/oldrax */
pushq %rax /* push real oldrax to the rdi slot */
leaq \sym(%rip),%rax
jmp error_entry
.endm
.macro errorentry sym
pushq %rax
leaq \sym(%rip),%rax
jmp error_entry
.endm
/*
* Exception entry point. This expects an error code/orig_rax on the stack
* and the exception handler in %rax.
*/
ALIGN
error_entry:
/* rdi slot contains rax, oldrax contains error code */
pushq %rsi
movq 8(%rsp),%rsi /* load rax */
pushq %rdx
pushq %rcx
pushq %rsi /* store rax */
pushq %r8
pushq %r9
pushq %r10
pushq %r11
cld
SAVE_REST
xorl %r15d,%r15d
testl $3,CS(%rsp)
je error_kernelspace
swapgs
error_action:
movq %rdi,RDI(%rsp)
movq %rsp,%rdi
movq ORIG_RAX(%rsp),%rsi /* get error code */
movq $-1,ORIG_RAX(%rsp)
call *%rax
/* r15d: swapgs flag */
error_exit:
testl %r15d,%r15d
jnz error_restore
error_test:
cli
GET_CURRENT(%rcx)
cmpq $0,tsk_need_resched(%rcx)
jne error_reschedule
cmpl $0,tsk_sigpending(%rcx)
jne error_signal
error_restore_swapgs:
swapgs
error_restore:
RESTORE_REST
jmp retint_restore_args
error_reschedule:
sti
call schedule
jmp error_test
error_signal:
sti
xorq %rsi,%rsi
movq %rsp,%rdi
call do_signal
error_signal_test:
GET_CURRENT(%rcx)
cli
cmpq $0,tsk_need_resched(%rcx)
je error_restore_swapgs
sti
call schedule
jmp error_signal_test
error_kernelspace:
incl %r15d
/* There are two places in the kernel that can potentially fault with
usergs. Handle them here. */
cmpq $iret_label,RIP(%rsp)
je 1f
cmpq $gs_change,RIP(%rsp)
jne error_action
/* iret_label and gs_change are handled by exception handlers
and the exit points run with kernelgs again */
1: swapgs
jmp error_action
/* Reload gs selector with exception handling */
/* edi: new selector */
ENTRY(load_gs_index)
pushf
cli
swapgs
gs_change:
movl %edi,%gs
2: mfence /* workaround for opteron errata #88 */
swapgs
popf
ret
.section __ex_table,"a"
.align 8
.quad gs_change,bad_gs
.previous
bad_gs:
swapgs
xorl %eax,%eax
movl %eax,%gs
jmp 2b
/*
* Create a kernel thread.
*
* C extern interface:
* extern long arch_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
*
* asm input arguments:
* rdi: fn, rsi: arg, rdx: flags
*/
ENTRY(arch_kernel_thread)
FAKE_STACK_FRAME $child_rip
SAVE_ALL
# rdi: flags, rsi: usp, rdx: will be &pt_regs
movq %rdx,%rdi
orq $CLONE_VM, %rdi
movq $-1, %rsi
movq %rsp, %rdx
# clone now
call do_fork
# save retval on the stack so it's popped before `ret`
movq %rax, RAX(%rsp)
/*
* It isn't worth to check for reschedule here,
* so internally to the x86_64 port you can rely on kernel_thread()
* not to reschedule the child before returning, this avoids the need
* of hacks for example to fork off the per-CPU idle tasks.
* [Hopefully no generic code relies on the reschedule -AK]
*/
RESTORE_ALL
UNFAKE_STACK_FRAME
ret
child_rip:
/*
* Here we are in the child and the registers are set as they were
* at kernel_thread() invocation in the parent.
*/
movq %rdi, %rax
movq %rsi, %rdi
call *%rax
# exit
xorq %rdi, %rdi
call do_exit
/*
* execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
*
* C extern interface:
* extern long execve(char *name, char **argv, char **envp)
*
* asm input arguments:
* rdi: name, rsi: argv, rdx: envp
*
* We want to fallback into:
* extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
*
* do_sys_execve asm fallback arguments:
* rdi: name, rsi: argv, rdx: envp, fake frame on the stack
*/
ENTRY(execve)
FAKE_STACK_FRAME $0
SAVE_ALL
call sys_execve
movq %rax, RAX(%rsp)
RESTORE_REST
testq %rax,%rax
je int_ret_from_sys_call
RESTORE_ARGS
UNFAKE_STACK_FRAME
ret
ENTRY(page_fault)
errorentry do_page_fault
ENTRY(coprocessor_error)
zeroentry do_coprocessor_error
ENTRY(simd_coprocessor_error)
zeroentry do_simd_coprocessor_error
ENTRY(device_not_available)
pushq $-1
SAVE_ALL
xorl %r15d,%r15d
testl $3,CS(%rsp)
jz 1f
swapgs
2: movq %cr0,%rax
leaq math_state_restore(%rip),%rcx
leaq math_emulate(%rip),%rbx
testl $0x4,%eax
cmoveq %rcx,%rbx
call *%rbx
jmp error_exit
1: incl %r15d
jmp 2b
ENTRY(debug)
zeroentry do_debug
ENTRY(nmi)
pushq $-1
SAVE_ALL
/* NMI could happen inside the critical section of a swapgs,
so it is needed to use this expensive way to check.
Rely on arch_prctl forbiding user space from setting a negative
GS. Only the kernel value is negative. */
movl $MSR_GS_BASE,%ecx
rdmsr
xorl %ebx,%ebx
testl %edx,%edx
js 1f
swapgs
movl $1,%ebx
1: movq %rsp,%rdi
call do_nmi
cli
testl %ebx,%ebx
jz error_restore
swapgs
jmp error_restore
ENTRY(int3)
zeroentry do_int3
ENTRY(overflow)
zeroentry do_overflow
ENTRY(bounds)
zeroentry do_bounds
ENTRY(invalid_op)
zeroentry do_invalid_op
ENTRY(coprocessor_segment_overrun)
zeroentry do_coprocessor_segment_overrun
ENTRY(reserved)
zeroentry do_reserved
ENTRY(double_fault)
errorentry do_double_fault
ENTRY(invalid_TSS)
errorentry do_invalid_TSS
ENTRY(segment_not_present)
errorentry do_segment_not_present
ENTRY(stack_segment)
errorentry do_stack_segment
ENTRY(general_protection)
errorentry do_general_protection
ENTRY(alignment_check)
errorentry do_alignment_check
ENTRY(divide_error)
zeroentry do_divide_error
ENTRY(spurious_interrupt_bug)
zeroentry do_spurious_interrupt_bug
ENTRY(machine_check)
zeroentry do_machine_check
ENTRY(call_debug)
zeroentry do_call_debug
|