diff options
-rw-r--r-- | arch/blackfin/Kconfig | 27 | ||||
-rw-r--r-- | arch/blackfin/lib/ins.S | 109 |
2 files changed, 136 insertions, 0 deletions
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index cc2add7e39e..ededf9319f3 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -622,6 +622,33 @@ config CPLB_SWITCH_TAB_L1 If enabled, the CPLB Switch Tables are linked into L1 data memory. (less latency) +comment "Speed Optimizations" +config BFIN_INS_LOWOVERHEAD + bool "ins[bwl] low overhead, higher interrupt latency" + default y + help + Reads on the Blackfin are speculative. In Blackfin terms, this means + they can be interrupted at any time (even after they have been issued + on to the external bus), and re-issued after the interrupt occurs. + For memory - this is not a big deal, since memory does not change if + it sees a read. + + If a FIFO is sitting on the end of the read, it will see two reads, + when the core only sees one since the FIFO receives both the read + which is cancelled (and not delivered to the core) and the one which + is re-issued (which is delivered to the core). + + To solve this, interrupts are turned off before reads occur to + I/O space. This option controls which the overhead/latency of + controlling interrupts during this time + "n" turns interrupts off every read + (higher overhead, but lower interrupt latency) + "y" turns interrupts off every loop + (low overhead, but longer interrupt latency) + + default behavior is to leave this set to on (type "Y"). If you are experiencing + interrupt latency issues, it is safe and OK to turn this off. + endmenu diff --git a/arch/blackfin/lib/ins.S b/arch/blackfin/lib/ins.S index eba2343b1b5..d60554dce87 100644 --- a/arch/blackfin/lib/ins.S +++ b/arch/blackfin/lib/ins.S @@ -33,7 +33,28 @@ .align 2 +/* + * Reads on the Blackfin are speculative. In Blackfin terms, this means they + * can be interrupted at any time (even after they have been issued on to the + * external bus), and re-issued after the interrupt occurs. + * + * If a FIFO is sitting on the end of the read, it will see two reads, + * when the core only sees one. The FIFO receives the read which is cancelled, + * and not delivered to the core. + * + * To solve this, interrupts are turned off before reads occur to I/O space. + * There are 3 versions of all these functions + * - turns interrupts off every read (higher overhead, but lower latency) + * - turns interrupts off every loop (low overhead, but longer latency) + * - DMA version, which do not suffer from this issue. DMA versions have + * different name (prefixed by dma_ ), and are located in + * ../kernel/bfin_dma_5xx.c + * Using the dma related functions are recommended for transfering large + * buffers in/out of FIFOs. + */ + ENTRY(_insl) +#ifdef CONFIG_BFIN_INS_LOWOVERHEAD P0 = R0; /* P0 = port */ cli R3; P1 = R1; /* P1 = address */ @@ -46,9 +67,26 @@ ENTRY(_insl) .Llong_loop_e: NOP; sti R3; RTS; +#else + P0 = R0; /* P0 = port */ + P1 = R1; /* P1 = address */ + P2 = R2; /* P2 = count */ + SSYNC; + LSETUP( .Llong_loop_s, .Llong_loop_e) LC0 = P2; +.Llong_loop_s: + CLI R3; + NOP; NOP; NOP; + R0 = [P0]; + [P1++] = R0; +.Llong_loop_e: + STI R3; + + RTS; +#endif ENDPROC(_insl) ENTRY(_insw) +#ifdef CONFIG_BFIN_INS_LOWOVERHEAD P0 = R0; /* P0 = port */ cli R3; P1 = R1; /* P1 = address */ @@ -61,9 +99,26 @@ ENTRY(_insw) .Lword_loop_e: NOP; sti R3; RTS; +#else + P0 = R0; /* P0 = port */ + P1 = R1; /* P1 = address */ + P2 = R2; /* P2 = count */ + SSYNC; + LSETUP( .Lword_loop_s, .Lword_loop_e) LC0 = P2; +.Lword_loop_s: + CLI R3; + NOP; NOP; NOP; + R0 = W[P0]; + W[P1++] = R0; +.Lword_loop_e: + STI R3; + RTS; + +#endif ENDPROC(_insw) ENTRY(_insw_8) +#ifdef CONFIG_BFIN_INS_LOWOVERHEAD P0 = R0; /* P0 = port */ cli R3; P1 = R1; /* P1 = address */ @@ -78,9 +133,29 @@ ENTRY(_insw_8) .Lword8_loop_e: NOP; sti R3; RTS; +#else + P0 = R0; /* P0 = port */ + P1 = R1; /* P1 = address */ + P2 = R2; /* P2 = count */ + SSYNC; + LSETUP( .Lword8_loop_s, .Lword8_loop_e) LC0 = P2; +.Lword8_loop_s: + CLI R3; + NOP; NOP; NOP; + R0 = W[P0]; + B[P1++] = R0; + R0 = R0 >> 8; + B[P1++] = R0; + NOP; +.Lword8_loop_e: + STI R3; + + RTS; +#endif ENDPROC(_insw_8) ENTRY(_insb) +#ifdef CONFIG_BFIN_INS_LOWOVERHEAD P0 = R0; /* P0 = port */ cli R3; P1 = R1; /* P1 = address */ @@ -93,9 +168,26 @@ ENTRY(_insb) .Lbyte_loop_e: NOP; sti R3; RTS; +#else + P0 = R0; /* P0 = port */ + P1 = R1; /* P1 = address */ + P2 = R2; /* P2 = count */ + SSYNC; + LSETUP( .Lbyte_loop_s, .Lbyte_loop_e) LC0 = P2; +.Lbyte_loop_s: + CLI R3; + NOP; NOP; NOP; + R0 = B[P0]; + B[P1++] = R0; +.Lbyte_loop_e: + STI R3; + + RTS; +#endif ENDPROC(_insb) ENTRY(_insl_16) +#ifdef CONFIG_BFIN_INS_LOWOVERHEAD P0 = R0; /* P0 = port */ cli R3; P1 = R1; /* P1 = address */ @@ -110,4 +202,21 @@ ENTRY(_insl_16) .Llong16_loop_e: NOP; sti R3; RTS; +#else + P0 = R0; /* P0 = port */ + P1 = R1; /* P1 = address */ + P2 = R2; /* P2 = count */ + SSYNC; + LSETUP( .Llong16_loop_s, .Llong16_loop_e) LC0 = P2; +.Llong16_loop_s: + CLI R3; + NOP; NOP; NOP; + R0 = [P0]; + W[P1++] = R0; + R0 = R0 >> 16; + W[P1++] = R0; +.Llong16_loop_e: + STI R3; + RTS; +#endif ENDPROC(_insl_16) |