Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | /* Copyright 2002 Andi Kleen */
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/cpufeature.h>
/*
* memcpy - Copy a memory block.
*
* Input:
* rdi destination
* rsi source
* rdx count
*
* Output:
* rax original destination
*/
ALIGN
memcpy_c:
CFI_STARTPROC
movq %rdi,%rax
movl %edx,%ecx
shrl $3,%ecx
andl $7,%edx
rep movsq
movl %edx,%ecx
rep movsb
ret
CFI_ENDPROC
ENDPROC(memcpy_c)
ENTRY(__memcpy)
ENTRY(memcpy)
CFI_STARTPROC
pushq %rbx
CFI_ADJUST_CFA_OFFSET 8
CFI_REL_OFFSET rbx, 0
movq %rdi,%rax
movl %edx,%ecx
shrl $6,%ecx
jz .Lhandle_tail
.p2align 4
.Lloop_64:
decl %ecx
movq (%rsi),%r11
movq 8(%rsi),%r8
movq %r11,(%rdi)
movq %r8,1*8(%rdi)
movq 2*8(%rsi),%r9
movq 3*8(%rsi),%r10
movq %r9,2*8(%rdi)
movq %r10,3*8(%rdi)
movq 4*8(%rsi),%r11
movq 5*8(%rsi),%r8
movq %r11,4*8(%rdi)
movq %r8,5*8(%rdi)
movq 6*8(%rsi),%r9
movq 7*8(%rsi),%r10
movq %r9,6*8(%rdi)
movq %r10,7*8(%rdi)
leaq 64(%rsi),%rsi
leaq 64(%rdi),%rdi
jnz .Lloop_64
.Lhandle_tail:
movl %edx,%ecx
andl $63,%ecx
shrl $3,%ecx
jz .Lhandle_7
.p2align 4
.Lloop_8:
decl %ecx
movq (%rsi),%r8
movq %r8,(%rdi)
leaq 8(%rdi),%rdi
leaq 8(%rsi),%rsi
jnz .Lloop_8
.Lhandle_7:
movl %edx,%ecx
andl $7,%ecx
jz .Lende
.p2align 4
.Lloop_1:
movb (%rsi),%r8b
movb %r8b,(%rdi)
incq %rdi
incq %rsi
decl %ecx
jnz .Lloop_1
.Lende:
popq %rbx
CFI_ADJUST_CFA_OFFSET -8
CFI_RESTORE rbx
ret
.Lfinal:
CFI_ENDPROC
ENDPROC(memcpy)
ENDPROC(__memcpy)
/* Some CPUs run faster using the string copy instructions.
It is also a lot simpler. Use this when possible */
.section .altinstr_replacement,"ax"
1: .byte 0xeb /* jmp <disp8> */
.byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
2:
.previous
.section .altinstructions,"a"
.align 8
.quad memcpy
.quad 1b
.byte X86_FEATURE_REP_GOOD
/* Replace only beginning, memcpy is used to apply alternatives, so it
* is silly to overwrite itself with nops - reboot is only outcome... */
.byte 2b - 1b
.byte 2b - 1b
.previous
|