aboutsummaryrefslogtreecommitdiff
path: root/arch/x86_64/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64/lib')
-rw-r--r--arch/x86_64/lib/Makefile2
-rw-r--r--arch/x86_64/lib/clear_page.S47
-rw-r--r--arch/x86_64/lib/copy_page.S53
-rw-r--r--arch/x86_64/lib/copy_user.S153
-rw-r--r--arch/x86_64/lib/csum-copy.S26
-rw-r--r--arch/x86_64/lib/getuser.S32
-rw-r--r--arch/x86_64/lib/iomap_copy.S10
-rw-r--r--arch/x86_64/lib/memcpy.S69
-rw-r--r--arch/x86_64/lib/memset.S79
-rw-r--r--arch/x86_64/lib/putuser.S32
-rw-r--r--arch/x86_64/lib/rwlock.S38
-rw-r--r--arch/x86_64/lib/thunk.S43
12 files changed, 363 insertions, 221 deletions
diff --git a/arch/x86_64/lib/Makefile b/arch/x86_64/lib/Makefile
index ccef6ae747a..b78d4170fce 100644
--- a/arch/x86_64/lib/Makefile
+++ b/arch/x86_64/lib/Makefile
@@ -9,4 +9,4 @@ obj-y := io.o iomap_copy.o
lib-y := csum-partial.o csum-copy.o csum-wrappers.o delay.o \
usercopy.o getuser.o putuser.o \
thunk.o clear_page.o copy_page.o bitstr.o bitops.o
-lib-y += memcpy.o memmove.o memset.o copy_user.o
+lib-y += memcpy.o memmove.o memset.o copy_user.o rwlock.o
diff --git a/arch/x86_64/lib/clear_page.S b/arch/x86_64/lib/clear_page.S
index 1f81b79b796..9a10a78bb4a 100644
--- a/arch/x86_64/lib/clear_page.S
+++ b/arch/x86_64/lib/clear_page.S
@@ -1,10 +1,22 @@
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+
/*
* Zero a page.
* rdi page
*/
- .globl clear_page
- .p2align 4
-clear_page:
+ ALIGN
+clear_page_c:
+ CFI_STARTPROC
+ movl $4096/8,%ecx
+ xorl %eax,%eax
+ rep stosq
+ ret
+ CFI_ENDPROC
+ENDPROC(clear_page)
+
+ENTRY(clear_page)
+ CFI_STARTPROC
xorl %eax,%eax
movl $4096/64,%ecx
.p2align 4
@@ -23,28 +35,25 @@ clear_page:
jnz .Lloop
nop
ret
-clear_page_end:
+ CFI_ENDPROC
+.Lclear_page_end:
+ENDPROC(clear_page)
/* Some CPUs run faster using the string instructions.
It is also a lot simpler. Use this when possible */
#include <asm/cpufeature.h>
+ .section .altinstr_replacement,"ax"
+1: .byte 0xeb /* jmp <disp8> */
+ .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */
+2:
+ .previous
.section .altinstructions,"a"
.align 8
- .quad clear_page
- .quad clear_page_c
- .byte X86_FEATURE_REP_GOOD
- .byte clear_page_end-clear_page
- .byte clear_page_c_end-clear_page_c
- .previous
-
- .section .altinstr_replacement,"ax"
-clear_page_c:
- movl $4096/8,%ecx
- xorl %eax,%eax
- rep
- stosq
- ret
-clear_page_c_end:
+ .quad clear_page
+ .quad 1b
+ .byte X86_FEATURE_REP_GOOD
+ .byte .Lclear_page_end - clear_page
+ .byte 2b - 1b
.previous
diff --git a/arch/x86_64/lib/copy_page.S b/arch/x86_64/lib/copy_page.S
index 8fa19d96a7e..0ebb03b60e7 100644
--- a/arch/x86_64/lib/copy_page.S
+++ b/arch/x86_64/lib/copy_page.S
@@ -1,17 +1,33 @@
/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+
+ ALIGN
+copy_page_c:
+ CFI_STARTPROC
+ movl $4096/8,%ecx
+ rep movsq
+ ret
+ CFI_ENDPROC
+ENDPROC(copy_page_c)
+
/* Don't use streaming store because it's better when the target
ends up in cache. */
/* Could vary the prefetch distance based on SMP/UP */
- .globl copy_page
- .p2align 4
-copy_page:
+ENTRY(copy_page)
+ CFI_STARTPROC
subq $3*8,%rsp
+ CFI_ADJUST_CFA_OFFSET 3*8
movq %rbx,(%rsp)
+ CFI_REL_OFFSET rbx, 0
movq %r12,1*8(%rsp)
+ CFI_REL_OFFSET r12, 1*8
movq %r13,2*8(%rsp)
+ CFI_REL_OFFSET r13, 2*8
movl $(4096/64)-5,%ecx
.p2align 4
@@ -72,30 +88,33 @@ copy_page:
jnz .Loop2
movq (%rsp),%rbx
+ CFI_RESTORE rbx
movq 1*8(%rsp),%r12
+ CFI_RESTORE r12
movq 2*8(%rsp),%r13
+ CFI_RESTORE r13
addq $3*8,%rsp
+ CFI_ADJUST_CFA_OFFSET -3*8
ret
+.Lcopy_page_end:
+ CFI_ENDPROC
+ENDPROC(copy_page)
/* Some CPUs run faster using the string copy instructions.
It is also a lot simpler. Use this when possible */
#include <asm/cpufeature.h>
+ .section .altinstr_replacement,"ax"
+1: .byte 0xeb /* jmp <disp8> */
+ .byte (copy_page_c - copy_page) - (2f - 1b) /* offset */
+2:
+ .previous
.section .altinstructions,"a"
.align 8
- .quad copy_page
- .quad copy_page_c
- .byte X86_FEATURE_REP_GOOD
- .byte copy_page_c_end-copy_page_c
- .byte copy_page_c_end-copy_page_c
- .previous
-
- .section .altinstr_replacement,"ax"
-copy_page_c:
- movl $4096/8,%ecx
- rep
- movsq
- ret
-copy_page_c_end:
+ .quad copy_page
+ .quad 1b
+ .byte X86_FEATURE_REP_GOOD
+ .byte .Lcopy_page_end - copy_page
+ .byte 2b - 1b
.previous
diff --git a/arch/x86_64/lib/copy_user.S b/arch/x86_64/lib/copy_user.S
index f64569b83b5..70bebd31040 100644
--- a/arch/x86_64/lib/copy_user.S
+++ b/arch/x86_64/lib/copy_user.S
@@ -4,56 +4,78 @@
* Functions to copy from and to user space.
*/
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+
#define FIX_ALIGNMENT 1
- #include <asm/current.h>
- #include <asm/asm-offsets.h>
- #include <asm/thread_info.h>
- #include <asm/cpufeature.h>
+#include <asm/current.h>
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+#include <asm/cpufeature.h>
-/* Standard copy_to_user with segment limit checking */
- .globl copy_to_user
- .p2align 4
-copy_to_user:
- GET_THREAD_INFO(%rax)
- movq %rdi,%rcx
- addq %rdx,%rcx
- jc bad_to_user
- cmpq threadinfo_addr_limit(%rax),%rcx
- jae bad_to_user
-2:
+ .macro ALTERNATIVE_JUMP feature,orig,alt
+0:
.byte 0xe9 /* 32bit jump */
- .long .Lcug-1f
+ .long \orig-1f /* by default jump to orig */
1:
-
.section .altinstr_replacement,"ax"
-3: .byte 0xe9 /* replacement jmp with 8 bit immediate */
- .long copy_user_generic_c-1b /* offset */
+2: .byte 0xe9 /* near jump with 32bit immediate */
+ .long \alt-1b /* offset */ /* or alternatively to alt */
.previous
.section .altinstructions,"a"
.align 8
+ .quad 0b
.quad 2b
- .quad 3b
- .byte X86_FEATURE_REP_GOOD
+ .byte \feature /* when feature is set */
.byte 5
.byte 5
.previous
+ .endm
+
+/* Standard copy_to_user with segment limit checking */
+ENTRY(copy_to_user)
+ CFI_STARTPROC
+ GET_THREAD_INFO(%rax)
+ movq %rdi,%rcx
+ addq %rdx,%rcx
+ jc bad_to_user
+ cmpq threadinfo_addr_limit(%rax),%rcx
+ jae bad_to_user
+ xorl %eax,%eax /* clear zero flag */
+ ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
+ CFI_ENDPROC
+
+ENTRY(copy_user_generic)
+ CFI_STARTPROC
+ movl $1,%ecx /* set zero flag */
+ ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
+ CFI_ENDPROC
+
+ENTRY(__copy_from_user_inatomic)
+ CFI_STARTPROC
+ xorl %ecx,%ecx /* clear zero flag */
+ ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
+ CFI_ENDPROC
/* Standard copy_from_user with segment limit checking */
- .globl copy_from_user
- .p2align 4
-copy_from_user:
+ENTRY(copy_from_user)
+ CFI_STARTPROC
GET_THREAD_INFO(%rax)
movq %rsi,%rcx
addq %rdx,%rcx
jc bad_from_user
cmpq threadinfo_addr_limit(%rax),%rcx
jae bad_from_user
- /* FALL THROUGH to copy_user_generic */
+ movl $1,%ecx /* set zero flag */
+ ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
+ CFI_ENDPROC
+ENDPROC(copy_from_user)
.section .fixup,"ax"
/* must zero dest */
bad_from_user:
+ CFI_STARTPROC
movl %edx,%ecx
xorl %eax,%eax
rep
@@ -61,40 +83,32 @@ bad_from_user:
bad_to_user:
movl %edx,%eax
ret
+ CFI_ENDPROC
+END(bad_from_user)
.previous
/*
- * copy_user_generic - memory copy with exception handling.
+ * copy_user_generic_unrolled - memory copy with exception handling.
+ * This version is for CPUs like P4 that don't have efficient micro code for rep movsq
*
* Input:
* rdi destination
* rsi source
* rdx count
+ * ecx zero flag -- if true zero destination on error
*
* Output:
* eax uncopied bytes or 0 if successful.
*/
- .globl copy_user_generic
- .p2align 4
-copy_user_generic:
- .byte 0x66,0x66,0x90 /* 5 byte nop for replacement jump */
- .byte 0x66,0x90
-1:
- .section .altinstr_replacement,"ax"
-2: .byte 0xe9 /* near jump with 32bit immediate */
- .long copy_user_generic_c-1b /* offset */
- .previous
- .section .altinstructions,"a"
- .align 8
- .quad copy_user_generic
- .quad 2b
- .byte X86_FEATURE_REP_GOOD
- .byte 5
- .byte 5
- .previous
-.Lcug:
+ENTRY(copy_user_generic_unrolled)
+ CFI_STARTPROC
pushq %rbx
+ CFI_ADJUST_CFA_OFFSET 8
+ CFI_REL_OFFSET rbx, 0
+ pushq %rcx
+ CFI_ADJUST_CFA_OFFSET 8
+ CFI_REL_OFFSET rcx, 0
xorl %eax,%eax /*zero for the exception handler */
#ifdef FIX_ALIGNMENT
@@ -168,9 +182,16 @@ copy_user_generic:
decl %ecx
jnz .Lloop_1
+ CFI_REMEMBER_STATE
.Lende:
+ popq %rcx
+ CFI_ADJUST_CFA_OFFSET -8
+ CFI_RESTORE rcx
popq %rbx
+ CFI_ADJUST_CFA_OFFSET -8
+ CFI_RESTORE rbx
ret
+ CFI_RESTORE_STATE
#ifdef FIX_ALIGNMENT
/* align destination */
@@ -252,6 +273,8 @@ copy_user_generic:
addl %ecx,%edx
/* edx: bytes to zero, rdi: dest, eax:zero */
.Lzero_rest:
+ cmpl $0,(%rsp)
+ jz .Le_zero
movq %rdx,%rcx
.Le_byte:
xorl %eax,%eax
@@ -261,6 +284,9 @@ copy_user_generic:
.Le_zero:
movq %rdx,%rax
jmp .Lende
+ CFI_ENDPROC
+ENDPROC(copy_user_generic)
+
/* Some CPUs run faster using the string copy instructions.
This is also a lot simpler. Use them when possible.
@@ -270,6 +296,7 @@ copy_user_generic:
/* rdi destination
* rsi source
* rdx count
+ * ecx zero flag
*
* Output:
* eax uncopied bytes or 0 if successfull.
@@ -280,22 +307,48 @@ copy_user_generic:
* And more would be dangerous because both Intel and AMD have
* errata with rep movsq > 4GB. If someone feels the need to fix
* this please consider this.
- */
-copy_user_generic_c:
+ */
+ENTRY(copy_user_generic_string)
+ CFI_STARTPROC
+ movl %ecx,%r8d /* save zero flag */
movl %edx,%ecx
shrl $3,%ecx
andl $7,%edx
+ jz 10f
1: rep
movsq
movl %edx,%ecx
2: rep
movsb
-4: movl %ecx,%eax
+9: movl %ecx,%eax
ret
-3: lea (%rdx,%rcx,8),%rax
+
+ /* multiple of 8 byte */
+10: rep
+ movsq
+ xor %eax,%eax
ret
+ /* exception handling */
+3: lea (%rdx,%rcx,8),%rax /* exception on quad loop */
+ jmp 6f
+5: movl %ecx,%eax /* exception on byte loop */
+ /* eax: left over bytes */
+6: testl %r8d,%r8d /* zero flag set? */
+ jz 7f
+ movl %eax,%ecx /* initialize x86 loop counter */
+ push %rax
+ xorl %eax,%eax
+8: rep
+ stosb /* zero the rest */
+11: pop %rax
+7: ret
+ CFI_ENDPROC
+END(copy_user_generic_c)
+
.section __ex_table,"a"
.quad 1b,3b
- .quad 2b,4b
+ .quad 2b,5b
+ .quad 8b,11b
+ .quad 10b,3b
.previous
diff --git a/arch/x86_64/lib/csum-copy.S b/arch/x86_64/lib/csum-copy.S
index 72fd55ee896..f0dba36578e 100644
--- a/arch/x86_64/lib/csum-copy.S
+++ b/arch/x86_64/lib/csum-copy.S
@@ -5,8 +5,9 @@
* License. See the file COPYING in the main directory of this archive
* for more details. No warranty for anything given at all.
*/
- #include <linux/linkage.h>
- #include <asm/errno.h>
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+#include <asm/errno.h>
/*
* Checksum copy with exception handling.
@@ -53,19 +54,24 @@
.endm
- .globl csum_partial_copy_generic
- .p2align 4
-csum_partial_copy_generic:
+ENTRY(csum_partial_copy_generic)
+ CFI_STARTPROC
cmpl $3*64,%edx
jle .Lignore
.Lignore:
subq $7*8,%rsp
+ CFI_ADJUST_CFA_OFFSET 7*8
movq %rbx,2*8(%rsp)
+ CFI_REL_OFFSET rbx, 2*8
movq %r12,3*8(%rsp)
+ CFI_REL_OFFSET r12, 3*8
movq %r14,4*8(%rsp)
+ CFI_REL_OFFSET r14, 4*8
movq %r13,5*8(%rsp)
+ CFI_REL_OFFSET r13, 5*8
movq %rbp,6*8(%rsp)
+ CFI_REL_OFFSET rbp, 6*8
movq %r8,(%rsp)
movq %r9,1*8(%rsp)
@@ -208,14 +214,22 @@ csum_partial_copy_generic:
addl %ebx,%eax
adcl %r9d,%eax /* carry */
+ CFI_REMEMBER_STATE
.Lende:
movq 2*8(%rsp),%rbx
+ CFI_RESTORE rbx
movq 3*8(%rsp),%r12
+ CFI_RESTORE r12
movq 4*8(%rsp),%r14
+ CFI_RESTORE r14
movq 5*8(%rsp),%r13
+ CFI_RESTORE r13
movq 6*8(%rsp),%rbp
+ CFI_RESTORE rbp
addq $7*8,%rsp
+ CFI_ADJUST_CFA_OFFSET -7*8
ret
+ CFI_RESTORE_STATE
/* Exception handlers. Very simple, zeroing is done in the wrappers */
.Lbad_source:
@@ -231,3 +245,5 @@ csum_partial_copy_generic:
jz .Lende
movl $-EFAULT,(%rax)
jmp .Lende
+ CFI_ENDPROC
+ENDPROC(csum_partial_copy_generic)
diff --git a/arch/x86_64/lib/getuser.S b/arch/x86_64/lib/getuser.S
index 3844d5e885a..5448876261f 100644
--- a/arch/x86_64/lib/getuser.S
+++ b/arch/x86_64/lib/getuser.S
@@ -27,25 +27,26 @@
*/
#include <linux/linkage.h>
+#include <asm/dwarf2.h>
#include <asm/page.h>
#include <asm/errno.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
.text
- .p2align 4
-.globl __get_user_1
-__get_user_1:
+ENTRY(__get_user_1)
+ CFI_STARTPROC
GET_THREAD_INFO(%r8)
cmpq threadinfo_addr_limit(%r8),%rcx
jae bad_get_user
1: movzb (%rcx),%edx
xorl %eax,%eax
ret
+ CFI_ENDPROC
+ENDPROC(__get_user_1)
- .p2align 4
-.globl __get_user_2
-__get_user_2:
+ENTRY(__get_user_2)
+ CFI_STARTPROC
GET_THREAD_INFO(%r8)
addq $1,%rcx
jc 20f
@@ -57,10 +58,11 @@ __get_user_2:
ret
20: decq %rcx
jmp bad_get_user
+ CFI_ENDPROC
+ENDPROC(__get_user_2)
- .p2align 4
-.globl __get_user_4
-__get_user_4:
+ENTRY(__get_user_4)
+ CFI_STARTPROC
GET_THREAD_INFO(%r8)
addq $3,%rcx
jc 30f
@@ -72,10 +74,11 @@ __get_user_4:
ret
30: subq $3,%rcx
jmp bad_get_user
+ CFI_ENDPROC
+ENDPROC(__get_user_4)
- .p2align 4
-.globl __get_user_8
-__get_user_8:
+ENTRY(__get_user_8)
+ CFI_STARTPROC
GET_THREAD_INFO(%r8)
addq $7,%rcx
jc 40f
@@ -87,11 +90,16 @@ __get_user_8:
ret
40: subq $7,%rcx
jmp bad_get_user
+ CFI_ENDPROC
+ENDPROC(__get_user_8)
bad_get_user:
+ CFI_STARTPROC
xorl %edx,%edx
movq $(-EFAULT),%rax
ret
+ CFI_ENDPROC
+END(bad_get_user)
.section __ex_table,"a"
.quad 1b,bad_get_user
diff --git a/arch/x86_64/lib/iomap_copy.S b/arch/x86_64/lib/iomap_copy.S
index 8bbade5fea0..05a95e713da 100644
--- a/arch/x86_64/lib/iomap_copy.S
+++ b/arch/x86_64/lib/iomap_copy.S
@@ -15,12 +15,16 @@
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*/
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+
/*
* override generic version in lib/iomap_copy.c
*/
- .globl __iowrite32_copy
- .p2align 4
-__iowrite32_copy:
+ENTRY(__iowrite32_copy)
+ CFI_STARTPROC
movl %edx,%ecx
rep movsd
ret
+ CFI_ENDPROC
+ENDPROC(__iowrite32_copy)
diff --git a/arch/x86_64/lib/memcpy.S b/arch/x86_64/lib/memcpy.S
index 5554948b555..967b22fa7d0 100644
--- a/arch/x86_64/lib/memcpy.S
+++ b/arch/x86_64/lib/memcpy.S
@@ -1,6 +1,10 @@
/* Copyright 2002 Andi Kleen */
- #include <asm/cpufeature.h>
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+#include <asm/cpufeature.h>
+
/*
* memcpy - Copy a memory block.
*
@@ -13,12 +17,26 @@
* rax original destination
*/
- .globl __memcpy
- .globl memcpy
- .p2align 4
-__memcpy:
-memcpy:
+ ALIGN
+memcpy_c:
+ CFI_STARTPROC
+ movq %rdi,%rax
+ movl %edx,%ecx
+ shrl $3,%ecx
+ andl $7,%edx
+ rep movsq
+ movl %edx,%ecx
+ rep movsb
+ ret
+ CFI_ENDPROC
+ENDPROC(memcpy_c)
+
+ENTRY(__memcpy)
+ENTRY(memcpy)
+ CFI_STARTPROC
pushq %rbx
+ CFI_ADJUST_CFA_OFFSET 8
+ CFI_REL_OFFSET rbx, 0
movq %rdi,%rax
movl %edx,%ecx
@@ -86,36 +104,27 @@ memcpy:
.Lende:
popq %rbx
+ CFI_ADJUST_CFA_OFFSET -8
+ CFI_RESTORE rbx
ret
.Lfinal:
+ CFI_ENDPROC
+ENDPROC(memcpy)
+ENDPROC(__memcpy)
/* Some CPUs run faster using the string copy instructions.
It is also a lot simpler. Use this when possible */
+ .section .altinstr_replacement,"ax"
+1: .byte 0xeb /* jmp <disp8> */
+ .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
+2:
+ .previous
.section .altinstructions,"a"
.align 8
- .quad memcpy
- .quad memcpy_c
- .byte X86_FEATURE_REP_GOOD
- .byte .Lfinal-memcpy
- .byte memcpy_c_end-memcpy_c
- .previous
-
- .section .altinstr_replacement,"ax"
- /* rdi destination
- * rsi source
- * rdx count
- */
-memcpy_c:
- movq %rdi,%rax
- movl %edx,%ecx
- shrl $3,%ecx
- andl $7,%edx
- rep
- movsq
- movl %edx,%ecx
- rep
- movsb
- ret
-memcpy_c_end:
+ .quad memcpy
+ .quad 1b
+ .byte X86_FEATURE_REP_GOOD
+ .byte .Lfinal - memcpy
+ .byte 2b - 1b
.previous
diff --git a/arch/x86_64/lib/memset.S b/arch/x86_64/lib/memset.S
index ad397f2c7de..09ed1f6b0ea 100644
--- a/arch/x86_64/lib/memset.S
+++ b/arch/x86_64/lib/memset.S
@@ -1,4 +1,9 @@
/* Copyright 2002 Andi Kleen, SuSE Labs */
+
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+
/*
* ISO C memset - set a memory block to a byte value.
*
@@ -8,11 +13,29 @@
*
* rax original destination
*/
- .globl __memset
- .globl memset
- .p2align 4
-memset:
-__memset:
+ ALIGN
+memset_c:
+ CFI_STARTPROC
+ movq %rdi,%r9
+ movl %edx,%r8d
+ andl $7,%r8d
+ movl %edx,%ecx
+ shrl $3,%ecx
+ /* expand byte value */
+ movzbl %sil,%esi
+ movabs $0x0101010101010101,%rax
+ mulq %rsi /* with rax, clobbers rdx */
+ rep stosq
+ movl %r8d,%ecx
+ rep stosb
+ movq %r9,%rax
+ ret
+ CFI_ENDPROC
+ENDPROC(memset_c)
+
+ENTRY(memset)
+ENTRY(__memset)
+ CFI_STARTPROC
movq %rdi,%r10
movq %rdx,%r11
@@ -25,6 +48,7 @@ __memset:
movl %edi,%r9d
andl $7,%r9d
jnz .Lbad_alignment
+ CFI_REMEMBER_STATE
.Lafter_bad_alignment:
movl %r11d,%ecx
@@ -75,6 +99,7 @@ __memset:
movq %r10,%rax
ret
+ CFI_RESTORE_STATE
.Lbad_alignment:
cmpq $7,%r11
jbe .Lhandle_7
@@ -84,42 +109,26 @@ __memset:
addq %r8,%rdi
subq %r8,%r11
jmp .Lafter_bad_alignment
+.Lfinal:
+ CFI_ENDPROC
+ENDPROC(memset)
+ENDPROC(__memset)
/* Some CPUs run faster using the string instructions.
It is also a lot simpler. Use this when possible */
#include <asm/cpufeature.h>
+ .section .altinstr_replacement,"ax"
+1: .byte 0xeb /* jmp <disp8> */
+ .byte (memset_c - memset) - (2f - 1b) /* offset */
+2:
+ .previous
.section .altinstructions,"a"
.align 8
- .quad memset
- .quad memset_c
- .byte X86_FEATURE_REP_GOOD
- .byte memset_c_end-memset_c
- .byte memset_c_end-memset_c
- .previous
-
- .section .altinstr_replacement,"ax"
- /* rdi destination
- * rsi value
- * rdx count
- */
-memset_c:
- movq %rdi,%r9
- movl %edx,%r8d
- andl $7,%r8d
- movl %edx,%ecx
- shrl $3,%ecx
- /* expand byte value */
- movzbl %sil,%esi
- movabs $0x0101010101010101,%rax
- mulq %rsi /* with rax, clobbers rdx */
- rep
- stosq
- movl %r8d,%ecx
- rep
- stosb
- movq %r9,%rax
- ret
-memset_c_end:
+ .quad memset
+ .quad 1b
+ .byte X86_FEATURE_REP_GOOD
+ .byte .Lfinal - memset
+ .byte 2b - 1b
.previous
diff --git a/arch/x86_64/lib/putuser.S b/arch/x86_64/lib/putuser.S
index 7f5593974e2..4989f5a8fa9 100644
--- a/arch/x86_64/lib/putuser.S
+++ b/arch/x86_64/lib/putuser.S
@@ -25,25 +25,26 @@
*/
#include <linux/linkage.h>
+#include <asm/dwarf2.h>
#include <asm/page.h>
#include <asm/errno.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
.text
- .p2align 4
-.globl __put_user_1
-__put_user_1:
+ENTRY(__put_user_1)
+ CFI_STARTPROC
GET_THREAD_INFO(%r8)
cmpq threadinfo_addr_limit(%r8),%rcx
jae bad_put_user
1: movb %dl,(%rcx)
xorl %eax,%eax
ret
+ CFI_ENDPROC
+ENDPROC(__put_user_1)
- .p2align 4
-.globl __put_user_2
-__put_user_2:
+ENTRY(__put_user_2)
+ CFI_STARTPROC
GET_THREAD_INFO(%r8)
addq $1,%rcx
jc 20f
@@ -55,10 +56,11 @@ __put_user_2:
ret
20: decq %rcx
jmp bad_put_user
+ CFI_ENDPROC
+ENDPROC(__put_user_2)
- .p2align 4
-.globl __put_user_4
-__put_user_4:
+ENTRY(__put_user_4)
+ CFI_STARTPROC
GET_THREAD_INFO(%r8)
addq $3,%rcx
jc 30f
@@ -70,10 +72,11 @@ __put_user_4:
ret
30: subq $3,%rcx
jmp bad_put_user
+ CFI_ENDPROC
+ENDPROC(__put_user_4)
- .p2align 4
-.globl __put_user_8
-__put_user_8:
+ENTRY(__put_user_8)
+ CFI_STARTPROC
GET_THREAD_INFO(%r8)
addq $7,%rcx
jc 40f
@@ -85,10 +88,15 @@ __put_user_8:
ret
40: subq $7,%rcx
jmp bad_put_user
+ CFI_ENDPROC
+ENDPROC(__put_user_8)
bad_put_user:
+ CFI_STARTPROC
movq $(-EFAULT),%rax
ret
+ CFI_ENDPROC
+END(bad_put_user)
.section __ex_table,"a"
.quad 1b,bad_put_user
diff --git a/arch/x86_64/lib/rwlock.S b/arch/x86_64/lib/rwlock.S
new file mode 100644
index 00000000000..0cde1f80731
--- /dev/null
+++ b/arch/x86_64/lib/rwlock.S
@@ -0,0 +1,38 @@
+/* Slow paths of read/write spinlocks. */
+
+#include <linux/linkage.h>
+#include <asm/rwlock.h>
+#include <asm/alternative-asm.i>
+#include <asm/dwarf2.h>
+
+/* rdi: pointer to rwlock_t */
+ENTRY(__write_lock_failed)
+ CFI_STARTPROC
+ LOCK_PREFIX
+ addl $RW_LOCK_BIAS,(%rdi)
+1: rep
+ nop
+ cmpl $RW_LOCK_BIAS,(%rdi)
+ jne 1b
+ LOCK_PREFIX
+ subl $RW_LOCK_BIAS,(%rdi)
+ jnz __write_lock_failed
+ ret
+ CFI_ENDPROC
+END(__write_lock_failed)
+
+/* rdi: pointer to rwlock_t */
+ENTRY(__read_lock_failed)
+ CFI_STARTPROC
+ LOCK_PREFIX
+ incl (%rdi)
+1: rep
+ nop
+ cmpl $1,(%rdi)
+ js 1b
+ LOCK_PREFIX
+ decl (%rdi)
+ js __read_lock_failed
+ ret
+ CFI_ENDPROC
+END(__read_lock_failed)
diff --git a/arch/x86_64/lib/thunk.S b/arch/x86_64/lib/thunk.S
index 332ea5dff91..0025535cac8 100644
--- a/arch/x86_64/lib/thunk.S
+++ b/arch/x86_64/lib/thunk.S
@@ -1,10 +1,9 @@
- /*
- * Save registers before calling assembly functions. This avoids
- * disturbance of register allocation in some inline assembly constructs.
- * Copyright 2001,2002 by Andi Kleen, SuSE Labs.
- * Subject to the GNU public license, v.2. No warranty of any kind.
- * $Id: thunk.S,v 1.2 2002/03/13 20:06:58 ak Exp $
- */
+/*
+ * Save registers before calling assembly functions. This avoids
+ * disturbance of register allocation in some inline assembly constructs.
+ * Copyright 2001,2002 by Andi Kleen, SuSE Labs.
+ * Subject to the GNU public license, v.2. No warranty of any kind.
+ */
#include <linux/config.h>
#include <linux/linkage.h>
@@ -67,33 +66,3 @@ restore_norax:
RESTORE_ARGS 1
ret
CFI_ENDPROC
-
-#ifdef CONFIG_SMP
-/* Support for read/write spinlocks. */
- .text
-/* rax: pointer to rwlock_t */
-ENTRY(__write_lock_failed)
- lock
- addl $RW_LOCK_BIAS,(%rax)
-1: rep
- nop
- cmpl $RW_LOCK_BIAS,(%rax)
- jne 1b
- lock
- subl $RW_LOCK_BIAS,(%rax)
- jnz __write_lock_failed
- ret
-
-/* rax: pointer to rwlock_t */
-ENTRY(__read_lock_failed)
- lock
- incl (%rax)
-1: rep
- nop
- cmpl $1,(%rax)
- js 1b
- lock
- decl (%rax)
- js __read_lock_failed
- ret
-#endif