aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/lib')
-rw-r--r--arch/powerpc/lib/Makefile3
-rw-r--r--arch/powerpc/lib/copypage_64.S198
-rw-r--r--arch/powerpc/lib/copyuser_64.S17
-rw-r--r--arch/powerpc/lib/dma-noncoherent.c27
-rw-r--r--arch/powerpc/lib/memcpy_64.S16
-rw-r--r--arch/powerpc/lib/rheap.c1
6 files changed, 139 insertions, 123 deletions
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index d69912c07ce..8db35278a4b 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -6,6 +6,9 @@ ifeq ($(CONFIG_PPC64),y)
EXTRA_CFLAGS += -mno-minimal-toc
endif
+CFLAGS_REMOVE_code-patching.o = -pg
+CFLAGS_REMOVE_feature-fixups.o = -pg
+
obj-y := string.o alloc.o \
checksum_$(CONFIG_WORD_SIZE).o
obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
index f9837f44ac0..75f3267fdc3 100644
--- a/arch/powerpc/lib/copypage_64.S
+++ b/arch/powerpc/lib/copypage_64.S
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2002 Paul Mackerras, IBM Corp.
+ * Copyright (C) 2008 Mark Nelson, IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -8,112 +8,100 @@
*/
#include <asm/processor.h>
#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+ .section ".toc","aw"
+PPC64_CACHES:
+ .tc ppc64_caches[TC],ppc64_caches
+ .section ".text"
+
_GLOBAL(copy_4K_page)
- std r31,-8(1)
- std r30,-16(1)
- std r29,-24(1)
- std r28,-32(1)
- std r27,-40(1)
- std r26,-48(1)
- std r25,-56(1)
- std r24,-64(1)
- std r23,-72(1)
- std r22,-80(1)
- std r21,-88(1)
- std r20,-96(1)
- li r5,4096/32 - 1
+ li r5,4096 /* 4K page size */
+BEGIN_FTR_SECTION
+ ld r10,PPC64_CACHES@toc(r2)
+ lwz r11,DCACHEL1LOGLINESIZE(r10) /* log2 of cache line size */
+ lwz r12,DCACHEL1LINESIZE(r10) /* get cache line size */
+ li r9,0
+ srd r8,r5,r11
+
+ mtctr r8
+setup:
+ dcbt r9,r4
+ dcbz r9,r3
+ add r9,r9,r12
+ bdnz setup
+END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ)
addi r3,r3,-8
- li r12,5
-0: addi r5,r5,-24
- mtctr r12
- ld r22,640(4)
- ld r21,512(4)
- ld r20,384(4)
- ld r11,256(4)
- ld r9,128(4)
- ld r7,0(4)
- ld r25,648(4)
- ld r24,520(4)
- ld r23,392(4)
- ld r10,264(4)
- ld r8,136(4)
- ldu r6,8(4)
- cmpwi r5,24
-1: std r22,648(3)
- std r21,520(3)
- std r20,392(3)
- std r11,264(3)
- std r9,136(3)
- std r7,8(3)
- ld r28,648(4)
- ld r27,520(4)
- ld r26,392(4)
- ld r31,264(4)
- ld r30,136(4)
- ld r29,8(4)
- std r25,656(3)
- std r24,528(3)
- std r23,400(3)
- std r10,272(3)
- std r8,144(3)
- std r6,16(3)
- ld r22,656(4)
- ld r21,528(4)
- ld r20,400(4)
- ld r11,272(4)
- ld r9,144(4)
- ld r7,16(4)
- std r28,664(3)
- std r27,536(3)
- std r26,408(3)
- std r31,280(3)
- std r30,152(3)
- stdu r29,24(3)
- ld r25,664(4)
- ld r24,536(4)
- ld r23,408(4)
- ld r10,280(4)
- ld r8,152(4)
- ldu r6,24(4)
+ srdi r8,r5,7 /* page is copied in 128 byte strides */
+ addi r8,r8,-1 /* one stride copied outside loop */
+
+ mtctr r8
+
+ ld r5,0(r4)
+ ld r6,8(r4)
+ ld r7,16(r4)
+ ldu r8,24(r4)
+1: std r5,8(r3)
+ ld r9,8(r4)
+ std r6,16(r3)
+ ld r10,16(r4)
+ std r7,24(r3)
+ ld r11,24(r4)
+ std r8,32(r3)
+ ld r12,32(r4)
+ std r9,40(r3)
+ ld r5,40(r4)
+ std r10,48(r3)
+ ld r6,48(r4)
+ std r11,56(r3)
+ ld r7,56(r4)
+ std r12,64(r3)
+ ld r8,64(r4)
+ std r5,72(r3)
+ ld r9,72(r4)
+ std r6,80(r3)
+ ld r10,80(r4)
+ std r7,88(r3)
+ ld r11,88(r4)
+ std r8,96(r3)
+ ld r12,96(r4)
+ std r9,104(r3)
+ ld r5,104(r4)
+ std r10,112(r3)
+ ld r6,112(r4)
+ std r11,120(r3)
+ ld r7,120(r4)
+ stdu r12,128(r3)
+ ldu r8,128(r4)
bdnz 1b
- std r22,648(3)
- std r21,520(3)
- std r20,392(3)
- std r11,264(3)
- std r9,136(3)
- std r7,8(3)
- addi r4,r4,640
- addi r3,r3,648
- bge 0b
- mtctr r5
- ld r7,0(4)
- ld r8,8(4)
- ldu r9,16(4)
-3: ld r10,8(4)
- std r7,8(3)
- ld r7,16(4)
- std r8,16(3)
- ld r8,24(4)
- std r9,24(3)
- ldu r9,32(4)
- stdu r10,32(3)
- bdnz 3b
-4: ld r10,8(4)
- std r7,8(3)
- std r8,16(3)
- std r9,24(3)
- std r10,32(3)
-9: ld r20,-96(1)
- ld r21,-88(1)
- ld r22,-80(1)
- ld r23,-72(1)
- ld r24,-64(1)
- ld r25,-56(1)
- ld r26,-48(1)
- ld r27,-40(1)
- ld r28,-32(1)
- ld r29,-24(1)
- ld r30,-16(1)
- ld r31,-8(1)
+
+ std r5,8(r3)
+ ld r9,8(r4)
+ std r6,16(r3)
+ ld r10,16(r4)
+ std r7,24(r3)
+ ld r11,24(r4)
+ std r8,32(r3)
+ ld r12,32(r4)
+ std r9,40(r3)
+ ld r5,40(r4)
+ std r10,48(r3)
+ ld r6,48(r4)
+ std r11,56(r3)
+ ld r7,56(r4)
+ std r12,64(r3)
+ ld r8,64(r4)
+ std r5,72(r3)
+ ld r9,72(r4)
+ std r6,80(r3)
+ ld r10,80(r4)
+ std r7,88(r3)
+ ld r11,88(r4)
+ std r8,96(r3)
+ ld r12,96(r4)
+ std r9,104(r3)
+ std r10,112(r3)
+ std r11,120(r3)
+ std r12,128(r3)
blr
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
index 25ec5378afa..70693a5c12a 100644
--- a/arch/powerpc/lib/copyuser_64.S
+++ b/arch/powerpc/lib/copyuser_64.S
@@ -26,11 +26,24 @@ _GLOBAL(__copy_tofrom_user)
andi. r6,r6,7
PPC_MTOCRF 0x01,r5
blt cr1,.Lshort_copy
+/* Below we want to nop out the bne if we're on a CPU that has the
+ * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
+ * cleared.
+ * At the time of writing the only CPU that has this combination of bits
+ * set is Power6.
+ */
+BEGIN_FTR_SECTION
+ nop
+FTR_SECTION_ELSE
bne .Ldst_unaligned
+ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
+ CPU_FTR_UNALIGNED_LD_STD)
.Ldst_aligned:
- andi. r0,r4,7
addi r3,r3,-16
+BEGIN_FTR_SECTION
+ andi. r0,r4,7
bne .Lsrc_unaligned
+END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
srdi r7,r5,4
20: ld r9,0(r4)
addi r4,r4,-8
@@ -138,7 +151,7 @@ _GLOBAL(__copy_tofrom_user)
PPC_MTOCRF 0x01,r6 /* put #bytes to 8B bdry into cr7 */
subf r5,r6,r5
li r7,0
- cmpldi r1,r5,16
+ cmpldi cr1,r5,16
bf cr7*4+3,1f
35: lbz r0,0(r4)
81: stb r0,0(r3)
diff --git a/arch/powerpc/lib/dma-noncoherent.c b/arch/powerpc/lib/dma-noncoherent.c
index 5d83907f659..b7dc4c19f58 100644
--- a/arch/powerpc/lib/dma-noncoherent.c
+++ b/arch/powerpc/lib/dma-noncoherent.c
@@ -77,26 +77,26 @@ static DEFINE_SPINLOCK(consistent_lock);
* the amount of RAM found at boot time.) I would imagine that get_vm_area()
* would have to initialise this each time prior to calling vm_region_alloc().
*/
-struct vm_region {
+struct ppc_vm_region {
struct list_head vm_list;
unsigned long vm_start;
unsigned long vm_end;
};
-static struct vm_region consistent_head = {
+static struct ppc_vm_region consistent_head = {
.vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
.vm_start = CONSISTENT_BASE,
.vm_end = CONSISTENT_END,
};
-static struct vm_region *
-vm_region_alloc(struct vm_region *head, size_t size, gfp_t gfp)
+static struct ppc_vm_region *
+ppc_vm_region_alloc(struct ppc_vm_region *head, size_t size, gfp_t gfp)
{
unsigned long addr = head->vm_start, end = head->vm_end - size;
unsigned long flags;
- struct vm_region *c, *new;
+ struct ppc_vm_region *c, *new;
- new = kmalloc(sizeof(struct vm_region), gfp);
+ new = kmalloc(sizeof(struct ppc_vm_region), gfp);
if (!new)
goto out;
@@ -130,9 +130,9 @@ vm_region_alloc(struct vm_region *head, size_t size, gfp_t gfp)
return NULL;
}
-static struct vm_region *vm_region_find(struct vm_region *head, unsigned long addr)
+static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsigned long addr)
{
- struct vm_region *c;
+ struct ppc_vm_region *c;
list_for_each_entry(c, &head->vm_list, vm_list) {
if (c->vm_start == addr)
@@ -151,7 +151,7 @@ void *
__dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp)
{
struct page *page;
- struct vm_region *c;
+ struct ppc_vm_region *c;
unsigned long order;
u64 mask = 0x00ffffff, limit; /* ISA default */
@@ -191,7 +191,7 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp)
/*
* Allocate a virtual address in the consistent mapping region.
*/
- c = vm_region_alloc(&consistent_head, size,
+ c = ppc_vm_region_alloc(&consistent_head, size,
gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
if (c) {
unsigned long vaddr = c->vm_start;
@@ -203,7 +203,7 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp)
/*
* Set the "dma handle"
*/
- *handle = page_to_bus(page);
+ *handle = page_to_phys(page);
do {
BUG_ON(!pte_none(*pte));
@@ -239,7 +239,7 @@ EXPORT_SYMBOL(__dma_alloc_coherent);
*/
void __dma_free_coherent(size_t size, void *vaddr)
{
- struct vm_region *c;
+ struct ppc_vm_region *c;
unsigned long flags, addr;
pte_t *ptep;
@@ -247,7 +247,7 @@ void __dma_free_coherent(size_t size, void *vaddr)
spin_lock_irqsave(&consistent_lock, flags);
- c = vm_region_find(&consistent_head, (unsigned long)vaddr);
+ c = ppc_vm_region_find(&consistent_head, (unsigned long)vaddr);
if (!c)
goto no_area;
@@ -320,7 +320,6 @@ static int __init dma_alloc_init(void)
ret = -ENOMEM;
break;
}
- WARN_ON(!pmd_none(*pmd));
pte = pte_alloc_kernel(pmd, CONSISTENT_BASE);
if (!pte) {
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
index 3f131129d1c..fe2d34e5332 100644
--- a/arch/powerpc/lib/memcpy_64.S
+++ b/arch/powerpc/lib/memcpy_64.S
@@ -18,11 +18,23 @@ _GLOBAL(memcpy)
andi. r6,r6,7
dcbt 0,r4
blt cr1,.Lshort_copy
+/* Below we want to nop out the bne if we're on a CPU that has the
+ CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
+ cleared.
+ At the time of writing the only CPU that has this combination of bits
+ set is Power6. */
+BEGIN_FTR_SECTION
+ nop
+FTR_SECTION_ELSE
bne .Ldst_unaligned
+ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
+ CPU_FTR_UNALIGNED_LD_STD)
.Ldst_aligned:
- andi. r0,r4,7
addi r3,r3,-16
+BEGIN_FTR_SECTION
+ andi. r0,r4,7
bne .Lsrc_unaligned
+END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
srdi r7,r5,4
ld r9,0(r4)
addi r4,r4,-8
@@ -131,7 +143,7 @@ _GLOBAL(memcpy)
PPC_MTOCRF 0x01,r6 # put #bytes to 8B bdry into cr7
subf r5,r6,r5
li r7,0
- cmpldi r1,r5,16
+ cmpldi cr1,r5,16
bf cr7*4+3,1f
lbz r0,0(r4)
stb r0,0(r3)
diff --git a/arch/powerpc/lib/rheap.c b/arch/powerpc/lib/rheap.c
index 29b2941cada..45907c1dae6 100644
--- a/arch/powerpc/lib/rheap.c
+++ b/arch/powerpc/lib/rheap.c
@@ -556,6 +556,7 @@ unsigned long rh_alloc_fixed(rh_info_t * info, unsigned long start, int size, co
be = blk->start + blk->size;
if (s >= bs && e <= be)
break;
+ blk = NULL;
}
if (blk == NULL)