diff options
Diffstat (limited to 'arch/sparc64')
-rw-r--r-- | arch/sparc64/kernel/auxio.c | 2 | ||||
-rw-r--r-- | arch/sparc64/kernel/entry.S | 116 | ||||
-rw-r--r-- | arch/sparc64/kernel/irq.c | 171 | ||||
-rw-r--r-- | arch/sparc64/kernel/semaphore.c | 12 | ||||
-rw-r--r-- | arch/sparc64/kernel/sparc64_ksyms.c | 1 | ||||
-rw-r--r-- | arch/sparc64/kernel/trampoline.S | 3 | ||||
-rw-r--r-- | arch/sparc64/lib/U1memcpy.S | 103 | ||||
-rw-r--r-- | arch/sparc64/lib/VISsave.S | 15 | ||||
-rw-r--r-- | arch/sparc64/lib/atomic.S | 42 | ||||
-rw-r--r-- | arch/sparc64/lib/bitops.S | 31 | ||||
-rw-r--r-- | arch/sparc64/lib/debuglocks.c | 6 | ||||
-rw-r--r-- | arch/sparc64/lib/dec_and_lock.S | 6 | ||||
-rw-r--r-- | arch/sparc64/lib/rwsem.S | 15 | ||||
-rw-r--r-- | arch/sparc64/mm/init.c | 6 | ||||
-rw-r--r-- | arch/sparc64/mm/ultra.S | 3 |
15 files changed, 201 insertions, 331 deletions
diff --git a/arch/sparc64/kernel/auxio.c b/arch/sparc64/kernel/auxio.c index a0716ccc2f4..8852c20c8d9 100644 --- a/arch/sparc64/kernel/auxio.c +++ b/arch/sparc64/kernel/auxio.c @@ -16,7 +16,7 @@ #include <asm/ebus.h> #include <asm/auxio.h> -/* This cannot be static, as it is referenced in entry.S */ +/* This cannot be static, as it is referenced in irq.c */ void __iomem *auxio_register = NULL; enum auxio_type { diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S index a47f2d0b1a2..eee516a71c1 100644 --- a/arch/sparc64/kernel/entry.S +++ b/arch/sparc64/kernel/entry.S @@ -271,8 +271,9 @@ cplus_fptrap_insn_1: fmuld %f0, %f2, %f26 faddd %f0, %f2, %f28 fmuld %f0, %f2, %f30 + membar #Sync b,pt %xcc, fpdis_exit - membar #Sync + nop 2: andcc %g5, FPRS_DU, %g0 bne,pt %icc, 3f fzero %f32 @@ -301,8 +302,9 @@ cplus_fptrap_insn_2: fmuld %f32, %f34, %f58 faddd %f32, %f34, %f60 fmuld %f32, %f34, %f62 + membar #Sync ba,pt %xcc, fpdis_exit - membar #Sync + nop 3: mov SECONDARY_CONTEXT, %g3 add %g6, TI_FPREGS, %g1 ldxa [%g3] ASI_DMMU, %g5 @@ -699,116 +701,6 @@ utrap_ill: ba,pt %xcc, rtrap clr %l6 -#ifdef CONFIG_BLK_DEV_FD - .globl floppy_hardint -floppy_hardint: - wr %g0, (1 << 11), %clear_softint - sethi %hi(doing_pdma), %g1 - ld [%g1 + %lo(doing_pdma)], %g2 - brz,pn %g2, floppy_dosoftint - sethi %hi(fdc_status), %g3 - ldx [%g3 + %lo(fdc_status)], %g3 - sethi %hi(pdma_vaddr), %g5 - ldx [%g5 + %lo(pdma_vaddr)], %g4 - sethi %hi(pdma_size), %g5 - ldx [%g5 + %lo(pdma_size)], %g5 - -next_byte: - lduba [%g3] ASI_PHYS_BYPASS_EC_E, %g7 - andcc %g7, 0x80, %g0 - be,pn %icc, floppy_fifo_emptied - andcc %g7, 0x20, %g0 - be,pn %icc, floppy_overrun - andcc %g7, 0x40, %g0 - be,pn %icc, floppy_write - sub %g5, 1, %g5 - - inc %g3 - lduba [%g3] ASI_PHYS_BYPASS_EC_E, %g7 - dec %g3 - orcc %g0, %g5, %g0 - stb %g7, [%g4] - bne,pn %xcc, next_byte - add %g4, 1, %g4 - - b,pt %xcc, floppy_tdone - nop - -floppy_write: - ldub [%g4], %g7 - orcc %g0, %g5, %g0 - inc %g3 - stba %g7, [%g3] ASI_PHYS_BYPASS_EC_E - dec %g3 - bne,pn %xcc, next_byte - add %g4, 1, %g4 - -floppy_tdone: - sethi %hi(pdma_vaddr), %g1 - stx %g4, [%g1 + %lo(pdma_vaddr)] - sethi %hi(pdma_size), %g1 - stx %g5, [%g1 + %lo(pdma_size)] - sethi %hi(auxio_register), %g1 - ldx [%g1 + %lo(auxio_register)], %g7 - lduba [%g7] ASI_PHYS_BYPASS_EC_E, %g5 - or %g5, AUXIO_AUX1_FTCNT, %g5 -/* andn %g5, AUXIO_AUX1_MASK, %g5 */ - stba %g5, [%g7] ASI_PHYS_BYPASS_EC_E - andn %g5, AUXIO_AUX1_FTCNT, %g5 -/* andn %g5, AUXIO_AUX1_MASK, %g5 */ - - nop; nop; nop; nop; nop; nop; - nop; nop; nop; nop; nop; nop; - - stba %g5, [%g7] ASI_PHYS_BYPASS_EC_E - sethi %hi(doing_pdma), %g1 - b,pt %xcc, floppy_dosoftint - st %g0, [%g1 + %lo(doing_pdma)] - -floppy_fifo_emptied: - sethi %hi(pdma_vaddr), %g1 - stx %g4, [%g1 + %lo(pdma_vaddr)] - sethi %hi(pdma_size), %g1 - stx %g5, [%g1 + %lo(pdma_size)] - sethi %hi(irq_action), %g1 - or %g1, %lo(irq_action), %g1 - ldx [%g1 + (11 << 3)], %g3 ! irqaction[floppy_irq] - ldx [%g3 + 0x08], %g4 ! action->flags>>48==ino - sethi %hi(ivector_table), %g3 - srlx %g4, 48, %g4 - or %g3, %lo(ivector_table), %g3 - sllx %g4, 5, %g4 - ldx [%g3 + %g4], %g4 ! &ivector_table[ino] - ldx [%g4 + 0x10], %g4 ! bucket->iclr - stwa %g0, [%g4] ASI_PHYS_BYPASS_EC_E ! ICLR_IDLE - membar #Sync ! probably not needed... - retry - -floppy_overrun: - sethi %hi(pdma_vaddr), %g1 - stx %g4, [%g1 + %lo(pdma_vaddr)] - sethi %hi(pdma_size), %g1 - stx %g5, [%g1 + %lo(pdma_size)] - sethi %hi(doing_pdma), %g1 - st %g0, [%g1 + %lo(doing_pdma)] - -floppy_dosoftint: - rdpr %pil, %g2 - wrpr %g0, 15, %pil - sethi %hi(109f), %g7 - b,pt %xcc, etrap_irq -109: or %g7, %lo(109b), %g7 - - mov 11, %o0 - mov 0, %o1 - call sparc_floppy_irq - add %sp, PTREGS_OFF, %o2 - - b,pt %xcc, rtrap_irq - nop - -#endif /* CONFIG_BLK_DEV_FD */ - /* XXX Here is stuff we still need to write... -DaveM XXX */ .globl netbsd_syscall netbsd_syscall: diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c index 4dcb8af9409..42471257730 100644 --- a/arch/sparc64/kernel/irq.c +++ b/arch/sparc64/kernel/irq.c @@ -37,6 +37,7 @@ #include <asm/uaccess.h> #include <asm/cache.h> #include <asm/cpudata.h> +#include <asm/auxio.h> #ifdef CONFIG_SMP static void distribute_irqs(void); @@ -834,137 +835,65 @@ void handler_irq(int irq, struct pt_regs *regs) } #ifdef CONFIG_BLK_DEV_FD -extern void floppy_interrupt(int irq, void *dev_cookie, struct pt_regs *regs); +extern irqreturn_t floppy_interrupt(int, void *, struct pt_regs *);; -void sparc_floppy_irq(int irq, void *dev_cookie, struct pt_regs *regs) -{ - struct irqaction *action = *(irq + irq_action); - struct ino_bucket *bucket; - int cpu = smp_processor_id(); - - irq_enter(); - kstat_this_cpu.irqs[irq]++; - - *(irq_work(cpu, irq)) = 0; - bucket = get_ino_in_irqaction(action) + ivector_table; - - bucket->flags |= IBF_INPROGRESS; - - floppy_interrupt(irq, dev_cookie, regs); - upa_writel(ICLR_IDLE, bucket->iclr); - - bucket->flags &= ~IBF_INPROGRESS; - - irq_exit(); -} -#endif - -/* The following assumes that the branch lies before the place we - * are branching to. This is the case for a trap vector... - * You have been warned. - */ -#define SPARC_BRANCH(dest_addr, inst_addr) \ - (0x10800000 | ((((dest_addr)-(inst_addr))>>2)&0x3fffff)) - -#define SPARC_NOP (0x01000000) +/* XXX No easy way to include asm/floppy.h XXX */ +extern unsigned char *pdma_vaddr; +extern unsigned long pdma_size; +extern volatile int doing_pdma; +extern unsigned long fdc_status; -static void install_fast_irq(unsigned int cpu_irq, - irqreturn_t (*handler)(int, void *, struct pt_regs *)) +irqreturn_t sparc_floppy_irq(int irq, void *dev_cookie, struct pt_regs *regs) { - extern unsigned long sparc64_ttable_tl0; - unsigned long ttent = (unsigned long) &sparc64_ttable_tl0; - unsigned int *insns; - - ttent += 0x820; - ttent += (cpu_irq - 1) << 5; - insns = (unsigned int *) ttent; - insns[0] = SPARC_BRANCH(((unsigned long) handler), - ((unsigned long)&insns[0])); - insns[1] = SPARC_NOP; - __asm__ __volatile__("membar #StoreStore; flush %0" : : "r" (ttent)); -} - -int request_fast_irq(unsigned int irq, - irqreturn_t (*handler)(int, void *, struct pt_regs *), - unsigned long irqflags, const char *name, void *dev_id) -{ - struct irqaction *action; - struct ino_bucket *bucket = __bucket(irq); - unsigned long flags; - - /* No pil0 dummy buckets allowed here. */ - if (bucket < &ivector_table[0] || - bucket >= &ivector_table[NUM_IVECS]) { - unsigned int *caller; - - __asm__ __volatile__("mov %%i7, %0" : "=r" (caller)); - printk(KERN_CRIT "request_fast_irq: Old style IRQ registry attempt " - "from %p, irq %08x.\n", caller, irq); - return -EINVAL; - } - - if (!handler) - return -EINVAL; + if (likely(doing_pdma)) { + void __iomem *stat = (void __iomem *) fdc_status; + unsigned char *vaddr = pdma_vaddr; + unsigned long size = pdma_size; + u8 val; + + while (size) { + val = readb(stat); + if (unlikely(!(val & 0x80))) { + pdma_vaddr = vaddr; + pdma_size = size; + return IRQ_HANDLED; + } + if (unlikely(!(val & 0x20))) { + pdma_vaddr = vaddr; + pdma_size = size; + doing_pdma = 0; + goto main_interrupt; + } + if (val & 0x40) { + /* read */ + *vaddr++ = readb(stat + 1); + } else { + unsigned char data = *vaddr++; - if ((bucket->pil == 0) || (bucket->pil == 14)) { - printk("request_fast_irq: Trying to register shared IRQ 0 or 14.\n"); - return -EBUSY; - } + /* write */ + writeb(data, stat + 1); + } + size--; + } - spin_lock_irqsave(&irq_action_lock, flags); + pdma_vaddr = vaddr; + pdma_size = size; - action = *(bucket->pil + irq_action); - if (action) { - if (action->flags & SA_SHIRQ) - panic("Trying to register fast irq when already shared.\n"); - if (irqflags & SA_SHIRQ) - panic("Trying to register fast irq as shared.\n"); - printk("request_fast_irq: Trying to register yet already owned.\n"); - spin_unlock_irqrestore(&irq_action_lock, flags); - return -EBUSY; - } + /* Send Terminal Count pulse to floppy controller. */ + val = readb(auxio_register); + val |= AUXIO_AUX1_FTCNT; + writeb(val, auxio_register); + val &= AUXIO_AUX1_FTCNT; + writeb(val, auxio_register); - /* - * We do not check for SA_SAMPLE_RANDOM in this path. Neither do we - * support smp intr affinity in this path. - */ - if (irqflags & SA_STATIC_ALLOC) { - if (static_irq_count < MAX_STATIC_ALLOC) - action = &static_irqaction[static_irq_count++]; - else - printk("Request for IRQ%d (%s) SA_STATIC_ALLOC failed " - "using kmalloc\n", bucket->pil, name); - } - if (action == NULL) - action = (struct irqaction *)kmalloc(sizeof(struct irqaction), - GFP_ATOMIC); - if (!action) { - spin_unlock_irqrestore(&irq_action_lock, flags); - return -ENOMEM; + doing_pdma = 0; } - install_fast_irq(bucket->pil, handler); - bucket->irq_info = action; - bucket->flags |= IBF_ACTIVE; - - action->handler = handler; - action->flags = irqflags; - action->dev_id = NULL; - action->name = name; - action->next = NULL; - put_ino_in_irqaction(action, irq); - put_smpaff_in_irqaction(action, CPU_MASK_NONE); - - *(bucket->pil + irq_action) = action; - enable_irq(irq); - - spin_unlock_irqrestore(&irq_action_lock, flags); - -#ifdef CONFIG_SMP - distribute_irqs(); -#endif - return 0; +main_interrupt: + return floppy_interrupt(irq, dev_cookie, regs); } +EXPORT_SYMBOL(sparc_floppy_irq); +#endif /* We really don't need these at all on the Sparc. We only have * stubs here because they are exported to modules. diff --git a/arch/sparc64/kernel/semaphore.c b/arch/sparc64/kernel/semaphore.c index 63496c43fe1..a809e63f03e 100644 --- a/arch/sparc64/kernel/semaphore.c +++ b/arch/sparc64/kernel/semaphore.c @@ -32,8 +32,9 @@ static __inline__ int __sem_update_count(struct semaphore *sem, int incr) " add %1, %4, %1\n" " cas [%3], %0, %1\n" " cmp %0, %1\n" +" membar #StoreLoad | #StoreStore\n" " bne,pn %%icc, 1b\n" -" membar #StoreLoad | #StoreStore\n" +" nop\n" : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count) : "r" (&sem->count), "r" (incr), "m" (sem->count) : "cc"); @@ -71,8 +72,9 @@ void up(struct semaphore *sem) " cmp %%g1, %%g7\n" " bne,pn %%icc, 1b\n" " addcc %%g7, 1, %%g0\n" +" membar #StoreLoad | #StoreStore\n" " ble,pn %%icc, 3f\n" -" membar #StoreLoad | #StoreStore\n" +" nop\n" "2:\n" " .subsection 2\n" "3: mov %0, %%g1\n" @@ -128,8 +130,9 @@ void __sched down(struct semaphore *sem) " cmp %%g1, %%g7\n" " bne,pn %%icc, 1b\n" " cmp %%g7, 1\n" +" membar #StoreLoad | #StoreStore\n" " bl,pn %%icc, 3f\n" -" membar #StoreLoad | #StoreStore\n" +" nop\n" "2:\n" " .subsection 2\n" "3: mov %0, %%g1\n" @@ -233,8 +236,9 @@ int __sched down_interruptible(struct semaphore *sem) " cmp %%g1, %%g7\n" " bne,pn %%icc, 1b\n" " cmp %%g7, 1\n" +" membar #StoreLoad | #StoreStore\n" " bl,pn %%icc, 3f\n" -" membar #StoreLoad | #StoreStore\n" +" nop\n" "2:\n" " .subsection 2\n" "3: mov %2, %%g1\n" diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index e78cc53594f..56cd96f4a5c 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c @@ -227,7 +227,6 @@ EXPORT_SYMBOL(__flush_dcache_range); EXPORT_SYMBOL(mostek_lock); EXPORT_SYMBOL(mstk48t02_regs); -EXPORT_SYMBOL(request_fast_irq); #ifdef CONFIG_SUN_AUXIO EXPORT_SYMBOL(auxio_set_led); EXPORT_SYMBOL(auxio_set_lte); diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S index 2c8f9344b4e..3a145fc39cf 100644 --- a/arch/sparc64/kernel/trampoline.S +++ b/arch/sparc64/kernel/trampoline.S @@ -98,8 +98,9 @@ startup_continue: sethi %hi(prom_entry_lock), %g2 1: ldstub [%g2 + %lo(prom_entry_lock)], %g1 + membar #StoreLoad | #StoreStore brnz,pn %g1, 1b - membar #StoreLoad | #StoreStore + nop sethi %hi(p1275buf), %g2 or %g2, %lo(p1275buf), %g2 diff --git a/arch/sparc64/lib/U1memcpy.S b/arch/sparc64/lib/U1memcpy.S index da9b520c718..bafd2fc07ac 100644 --- a/arch/sparc64/lib/U1memcpy.S +++ b/arch/sparc64/lib/U1memcpy.S @@ -87,14 +87,17 @@ #define LOOP_CHUNK3(src, dest, len, branch_dest) \ MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) +#define DO_SYNC membar #Sync; #define STORE_SYNC(dest, fsrc) \ EX_ST(STORE_BLK(%fsrc, %dest)); \ - add %dest, 0x40, %dest; + add %dest, 0x40, %dest; \ + DO_SYNC #define STORE_JUMP(dest, fsrc, target) \ EX_ST(STORE_BLK(%fsrc, %dest)); \ add %dest, 0x40, %dest; \ - ba,pt %xcc, target; + ba,pt %xcc, target; \ + nop; #define FINISH_VISCHUNK(dest, f0, f1, left) \ subcc %left, 8, %left;\ @@ -239,17 +242,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ba,pt %xcc, 1b+4 faligndata %f0, %f2, %f48 1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) - STORE_JUMP(o0, f48, 40f) membar #Sync + STORE_JUMP(o0, f48, 40f) 2: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) - STORE_JUMP(o0, f48, 48f) membar #Sync + STORE_JUMP(o0, f48, 48f) 3: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) - STORE_JUMP(o0, f48, 56f) membar #Sync + STORE_JUMP(o0, f48, 56f) 1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) @@ -260,17 +263,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ba,pt %xcc, 1b+4 faligndata %f2, %f4, %f48 1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) - STORE_JUMP(o0, f48, 41f) membar #Sync + STORE_JUMP(o0, f48, 41f) 2: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) - STORE_JUMP(o0, f48, 49f) membar #Sync + STORE_JUMP(o0, f48, 49f) 3: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) - STORE_JUMP(o0, f48, 57f) membar #Sync + STORE_JUMP(o0, f48, 57f) 1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) @@ -281,17 +284,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ba,pt %xcc, 1b+4 faligndata %f4, %f6, %f48 1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) - STORE_JUMP(o0, f48, 42f) membar #Sync + STORE_JUMP(o0, f48, 42f) 2: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) - STORE_JUMP(o0, f48, 50f) membar #Sync + STORE_JUMP(o0, f48, 50f) 3: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) - STORE_JUMP(o0, f48, 58f) membar #Sync + STORE_JUMP(o0, f48, 58f) 1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) @@ -302,17 +305,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ba,pt %xcc, 1b+4 faligndata %f6, %f8, %f48 1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) - STORE_JUMP(o0, f48, 43f) membar #Sync + STORE_JUMP(o0, f48, 43f) 2: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) - STORE_JUMP(o0, f48, 51f) membar #Sync + STORE_JUMP(o0, f48, 51f) 3: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) - STORE_JUMP(o0, f48, 59f) membar #Sync + STORE_JUMP(o0, f48, 59f) 1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) @@ -323,17 +326,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ba,pt %xcc, 1b+4 faligndata %f8, %f10, %f48 1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) - STORE_JUMP(o0, f48, 44f) membar #Sync + STORE_JUMP(o0, f48, 44f) 2: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) - STORE_JUMP(o0, f48, 52f) membar #Sync + STORE_JUMP(o0, f48, 52f) 3: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) - STORE_JUMP(o0, f48, 60f) membar #Sync + STORE_JUMP(o0, f48, 60f) 1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) @@ -344,17 +347,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ba,pt %xcc, 1b+4 faligndata %f10, %f12, %f48 1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) - STORE_JUMP(o0, f48, 45f) membar #Sync + STORE_JUMP(o0, f48, 45f) 2: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) - STORE_JUMP(o0, f48, 53f) membar #Sync + STORE_JUMP(o0, f48, 53f) 3: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) - STORE_JUMP(o0, f48, 61f) membar #Sync + STORE_JUMP(o0, f48, 61f) 1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) @@ -365,17 +368,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ba,pt %xcc, 1b+4 faligndata %f12, %f14, %f48 1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) - STORE_JUMP(o0, f48, 46f) membar #Sync + STORE_JUMP(o0, f48, 46f) 2: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) - STORE_JUMP(o0, f48, 54f) membar #Sync + STORE_JUMP(o0, f48, 54f) 3: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) - STORE_JUMP(o0, f48, 62f) membar #Sync + STORE_JUMP(o0, f48, 62f) 1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) @@ -386,17 +389,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ba,pt %xcc, 1b+4 faligndata %f14, %f16, %f48 1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) - STORE_JUMP(o0, f48, 47f) membar #Sync + STORE_JUMP(o0, f48, 47f) 2: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) - STORE_JUMP(o0, f48, 55f) membar #Sync + STORE_JUMP(o0, f48, 55f) 3: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) - STORE_JUMP(o0, f48, 63f) membar #Sync + STORE_JUMP(o0, f48, 63f) 40: FINISH_VISCHUNK(o0, f0, f2, g3) 41: FINISH_VISCHUNK(o0, f2, f4, g3) diff --git a/arch/sparc64/lib/VISsave.S b/arch/sparc64/lib/VISsave.S index 65e328d600a..4e18989bd60 100644 --- a/arch/sparc64/lib/VISsave.S +++ b/arch/sparc64/lib/VISsave.S @@ -72,7 +72,11 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3 stda %f48, [%g3 + %g1] ASI_BLK_P 5: membar #Sync - jmpl %g7 + %g0, %g0 + ba,pt %xcc, 80f + nop + + .align 32 +80: jmpl %g7 + %g0, %g0 nop 6: ldub [%g3 + TI_FPSAVED], %o5 @@ -87,8 +91,11 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3 stda %f32, [%g2 + %g1] ASI_BLK_P stda %f48, [%g3 + %g1] ASI_BLK_P membar #Sync - jmpl %g7 + %g0, %g0 + ba,pt %xcc, 80f + nop + .align 32 +80: jmpl %g7 + %g0, %g0 nop .align 32 @@ -126,6 +133,10 @@ VISenterhalf: stda %f0, [%g2 + %g1] ASI_BLK_P stda %f16, [%g3 + %g1] ASI_BLK_P membar #Sync + ba,pt %xcc, 4f + nop + + .align 32 4: and %o5, FPRS_DU, %o5 jmpl %g7 + %g0, %g0 wr %o5, FPRS_FEF, %fprs diff --git a/arch/sparc64/lib/atomic.S b/arch/sparc64/lib/atomic.S index e528b8d1a3e..faf87c31598 100644 --- a/arch/sparc64/lib/atomic.S +++ b/arch/sparc64/lib/atomic.S @@ -7,18 +7,6 @@ #include <linux/config.h> #include <asm/asi.h> - /* On SMP we need to use memory barriers to ensure - * correct memory operation ordering, nop these out - * for uniprocessor. - */ -#ifdef CONFIG_SMP -#define ATOMIC_PRE_BARRIER membar #StoreLoad | #LoadLoad -#define ATOMIC_POST_BARRIER membar #StoreLoad | #StoreStore -#else -#define ATOMIC_PRE_BARRIER nop -#define ATOMIC_POST_BARRIER nop -#endif - .text /* Two versions of the atomic routines, one that @@ -52,6 +40,24 @@ atomic_sub: /* %o0 = decrement, %o1 = atomic_ptr */ nop .size atomic_sub, .-atomic_sub + /* On SMP we need to use memory barriers to ensure + * correct memory operation ordering, nop these out + * for uniprocessor. + */ +#ifdef CONFIG_SMP + +#define ATOMIC_PRE_BARRIER membar #StoreLoad | #LoadLoad; +#define ATOMIC_POST_BARRIER \ + ba,pt %xcc, 80b; \ + membar #StoreLoad | #StoreStore + +80: retl + nop +#else +#define ATOMIC_PRE_BARRIER +#define ATOMIC_POST_BARRIER +#endif + .globl atomic_add_ret .type atomic_add_ret,#function atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ @@ -62,9 +68,10 @@ atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ cmp %g1, %g7 bne,pn %icc, 1b add %g7, %o0, %g7 + sra %g7, 0, %o0 ATOMIC_POST_BARRIER retl - sra %g7, 0, %o0 + nop .size atomic_add_ret, .-atomic_add_ret .globl atomic_sub_ret @@ -77,9 +84,10 @@ atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */ cmp %g1, %g7 bne,pn %icc, 1b sub %g7, %o0, %g7 + sra %g7, 0, %o0 ATOMIC_POST_BARRIER retl - sra %g7, 0, %o0 + nop .size atomic_sub_ret, .-atomic_sub_ret .globl atomic64_add @@ -118,9 +126,10 @@ atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ cmp %g1, %g7 bne,pn %xcc, 1b add %g7, %o0, %g7 + mov %g7, %o0 ATOMIC_POST_BARRIER retl - mov %g7, %o0 + nop .size atomic64_add_ret, .-atomic64_add_ret .globl atomic64_sub_ret @@ -133,7 +142,8 @@ atomic64_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */ cmp %g1, %g7 bne,pn %xcc, 1b sub %g7, %o0, %g7 + mov %g7, %o0 ATOMIC_POST_BARRIER retl - mov %g7, %o0 + nop .size atomic64_sub_ret, .-atomic64_sub_ret diff --git a/arch/sparc64/lib/bitops.S b/arch/sparc64/lib/bitops.S index 886dcd2b376..31afbfe6c1e 100644 --- a/arch/sparc64/lib/bitops.S +++ b/arch/sparc64/lib/bitops.S @@ -7,20 +7,26 @@ #include <linux/config.h> #include <asm/asi.h> + .text + /* On SMP we need to use memory barriers to ensure * correct memory operation ordering, nop these out * for uniprocessor. */ + #ifdef CONFIG_SMP #define BITOP_PRE_BARRIER membar #StoreLoad | #LoadLoad -#define BITOP_POST_BARRIER membar #StoreLoad | #StoreStore +#define BITOP_POST_BARRIER \ + ba,pt %xcc, 80b; \ + membar #StoreLoad | #StoreStore + +80: retl + nop #else -#define BITOP_PRE_BARRIER nop -#define BITOP_POST_BARRIER nop +#define BITOP_PRE_BARRIER +#define BITOP_POST_BARRIER #endif - .text - .globl test_and_set_bit .type test_and_set_bit,#function test_and_set_bit: /* %o0=nr, %o1=addr */ @@ -37,10 +43,11 @@ test_and_set_bit: /* %o0=nr, %o1=addr */ cmp %g7, %g1 bne,pn %xcc, 1b and %g7, %o2, %g2 - BITOP_POST_BARRIER clr %o0 + movrne %g2, 1, %o0 + BITOP_POST_BARRIER retl - movrne %g2, 1, %o0 + nop .size test_and_set_bit, .-test_and_set_bit .globl test_and_clear_bit @@ -59,10 +66,11 @@ test_and_clear_bit: /* %o0=nr, %o1=addr */ cmp %g7, %g1 bne,pn %xcc, 1b and %g7, %o2, %g2 - BITOP_POST_BARRIER clr %o0 + movrne %g2, 1, %o0 + BITOP_POST_BARRIER retl - movrne %g2, 1, %o0 + nop .size test_and_clear_bit, .-test_and_clear_bit .globl test_and_change_bit @@ -81,10 +89,11 @@ test_and_change_bit: /* %o0=nr, %o1=addr */ cmp %g7, %g1 bne,pn %xcc, 1b and %g7, %o2, %g2 - BITOP_POST_BARRIER clr %o0 + movrne %g2, 1, %o0 + BITOP_POST_BARRIER retl - movrne %g2, 1, %o0 + nop .size test_and_change_bit, .-test_and_change_bit .globl set_bit diff --git a/arch/sparc64/lib/debuglocks.c b/arch/sparc64/lib/debuglocks.c index c421e0c6532..f03344cf784 100644 --- a/arch/sparc64/lib/debuglocks.c +++ b/arch/sparc64/lib/debuglocks.c @@ -252,8 +252,9 @@ wlock_again: " andn %%g1, %%g3, %%g7\n" " casx [%0], %%g1, %%g7\n" " cmp %%g1, %%g7\n" +" membar #StoreLoad | #StoreStore\n" " bne,pn %%xcc, 1b\n" -" membar #StoreLoad | #StoreStore" +" nop" : /* no outputs */ : "r" (&(rw->lock)) : "g3", "g1", "g7", "cc", "memory"); @@ -351,8 +352,9 @@ int _do_write_trylock (rwlock_t *rw, char *str) " andn %%g1, %%g3, %%g7\n" " casx [%0], %%g1, %%g7\n" " cmp %%g1, %%g7\n" +" membar #StoreLoad | #StoreStore\n" " bne,pn %%xcc, 1b\n" -" membar #StoreLoad | #StoreStore" +" nop" : /* no outputs */ : "r" (&(rw->lock)) : "g3", "g1", "g7", "cc", "memory"); diff --git a/arch/sparc64/lib/dec_and_lock.S b/arch/sparc64/lib/dec_and_lock.S index 7e6fdaebedb..8ee288dd0af 100644 --- a/arch/sparc64/lib/dec_and_lock.S +++ b/arch/sparc64/lib/dec_and_lock.S @@ -48,8 +48,9 @@ start_to_zero: #endif to_zero: ldstub [%o1], %g3 + membar #StoreLoad | #StoreStore brnz,pn %g3, spin_on_lock - membar #StoreLoad | #StoreStore + nop loop2: cas [%o0], %g2, %g7 /* ASSERT(g7 == 0) */ cmp %g2, %g7 @@ -71,8 +72,9 @@ loop2: cas [%o0], %g2, %g7 /* ASSERT(g7 == 0) */ nop spin_on_lock: ldub [%o1], %g3 + membar #LoadLoad brnz,pt %g3, spin_on_lock - membar #LoadLoad + nop ba,pt %xcc, to_zero nop nop diff --git a/arch/sparc64/lib/rwsem.S b/arch/sparc64/lib/rwsem.S index 174ff7b9164..75f0e6b951d 100644 --- a/arch/sparc64/lib/rwsem.S +++ b/arch/sparc64/lib/rwsem.S @@ -17,8 +17,9 @@ __down_read: bne,pn %icc, 1b add %g7, 1, %g7 cmp %g7, 0 + membar #StoreLoad | #StoreStore bl,pn %icc, 3f - membar #StoreLoad | #StoreStore + nop 2: retl nop @@ -57,8 +58,9 @@ __down_write: cmp %g3, %g7 bne,pn %icc, 1b cmp %g7, 0 + membar #StoreLoad | #StoreStore bne,pn %icc, 3f - membar #StoreLoad | #StoreStore + nop 2: retl nop 3: @@ -97,8 +99,9 @@ __up_read: cmp %g1, %g7 bne,pn %icc, 1b cmp %g7, 0 + membar #StoreLoad | #StoreStore bl,pn %icc, 3f - membar #StoreLoad | #StoreStore + nop 2: retl nop 3: sethi %hi(RWSEM_ACTIVE_MASK), %g1 @@ -126,8 +129,9 @@ __up_write: bne,pn %icc, 1b sub %g7, %g1, %g7 cmp %g7, 0 + membar #StoreLoad | #StoreStore bl,pn %icc, 3f - membar #StoreLoad | #StoreStore + nop 2: retl nop @@ -151,8 +155,9 @@ __downgrade_write: bne,pn %icc, 1b sub %g7, %g1, %g7 cmp %g7, 0 + membar #StoreLoad | #StoreStore bl,pn %icc, 3f - membar #StoreLoad | #StoreStore + nop 2: retl nop diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 9c5222075da..8fc413cb6ac 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -136,8 +136,9 @@ static __inline__ void set_dcache_dirty(struct page *page, int this_cpu) "or %%g1, %0, %%g1\n\t" "casx [%2], %%g7, %%g1\n\t" "cmp %%g7, %%g1\n\t" + "membar #StoreLoad | #StoreStore\n\t" "bne,pn %%xcc, 1b\n\t" - " membar #StoreLoad | #StoreStore" + " nop" : /* no outputs */ : "r" (mask), "r" (non_cpu_bits), "r" (&page->flags) : "g1", "g7"); @@ -157,8 +158,9 @@ static __inline__ void clear_dcache_dirty_cpu(struct page *page, unsigned long c " andn %%g7, %1, %%g1\n\t" "casx [%2], %%g7, %%g1\n\t" "cmp %%g7, %%g1\n\t" + "membar #StoreLoad | #StoreStore\n\t" "bne,pn %%xcc, 1b\n\t" - " membar #StoreLoad | #StoreStore\n" + " nop\n" "2:" : /* no outputs */ : "r" (cpu), "r" (mask), "r" (&page->flags), diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S index 7a093432101..7a2431d3abc 100644 --- a/arch/sparc64/mm/ultra.S +++ b/arch/sparc64/mm/ultra.S @@ -266,8 +266,9 @@ __cheetah_flush_tlb_pending: /* 22 insns */ andn %o3, 1, %o3 stxa %g0, [%o3] ASI_IMMU_DEMAP 2: stxa %g0, [%o3] ASI_DMMU_DEMAP + membar #Sync brnz,pt %o1, 1b - membar #Sync + nop stxa %g2, [%o4] ASI_DMMU flush %g6 wrpr %g0, 0, %tl |