2 files changed, 136 insertions, 0 deletions
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index cc2add7e39e..ededf9319f3 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -622,6 +622,33 @@ config CPLB_SWITCH_TAB_L1
 	  If enabled, the CPLB Switch Tables are linked
 	  into L1 data memory. (less latency)
 
+comment "Speed Optimizations"
+config BFIN_INS_LOWOVERHEAD
+	bool "ins[bwl] low overhead, higher interrupt latency"
+	default y
+	help
+	  Reads on the Blackfin are speculative. In Blackfin terms, this means
+	  they can be interrupted at any time (even after they have been issued
+	  on to the external bus), and re-issued after the interrupt occurs.
+	  For memory - this is not a big deal, since memory does not change if
+	  it sees a read.
+
+	  If a FIFO is sitting on the end of the read, it will see two reads,
+	  when the core only sees one since the FIFO receives both the read
+	  which is cancelled (and not delivered to the core) and the one which
+	  is re-issued (which is delivered to the core).
+
+	  To solve this, interrupts are turned off before reads occur to
+	  I/O space. This option controls which the overhead/latency of
+	  controlling interrupts during this time
+	   "n" turns interrupts off every read
+		(higher overhead, but lower interrupt latency)
+	   "y" turns interrupts off every loop
+		(low overhead, but longer interrupt latency)
+
+	  default behavior is to leave this set to on (type "Y"). If you are experiencing
+	  interrupt latency issues, it is safe and OK to turn this off.
+
 endmenu
 
 
diff --git a/arch/blackfin/lib/ins.S b/arch/blackfin/lib/ins.S
index eba2343b1b5..d60554dce87 100644
--- a/arch/blackfin/lib/ins.S
+++ b/arch/blackfin/lib/ins.S
@@ -33,7 +33,28 @@
 
 .align 2
 
+/*
+ * Reads on the Blackfin are speculative. In Blackfin terms, this means they
+ * can be interrupted at any time (even after they have been issued on to the
+ * external bus), and re-issued after the interrupt occurs.
+ *
+ * If a FIFO is sitting on the end of the read, it will see two reads,
+ * when the core only sees one. The FIFO receives the read which is cancelled,
+ * and not delivered to the core.
+ *
+ * To solve this, interrupts are turned off before reads occur to I/O space.
+ * There are 3 versions of all these functions
+ *  - turns interrupts off every read (higher overhead, but lower latency)
+ *  - turns interrupts off every loop (low overhead, but longer latency)
+ *  - DMA version, which do not suffer from this issue. DMA versions have
+ *      different name (prefixed by dma_ ), and are located in
+ *      ../kernel/bfin_dma_5xx.c
+ * Using the dma related functions are recommended for transfering large
+ * buffers in/out of FIFOs.
+ */
+
 ENTRY(_insl)
+#ifdef CONFIG_BFIN_INS_LOWOVERHEAD
 	P0 = R0;	/* P0 = port */
 	cli R3;
 	P1 = R1;	/* P1 = address */
@@ -46,9 +67,26 @@ ENTRY(_insl)
 .Llong_loop_e: 	NOP;
 	sti R3;
 	RTS;
+#else
+	P0 = R0;	/* P0 = port */
+	P1 = R1;	/* P1 = address */
+	P2 = R2;	/* P2 = count */
+	SSYNC;
+	LSETUP( .Llong_loop_s, .Llong_loop_e) LC0 = P2;
+.Llong_loop_s:
+	CLI R3;
+	NOP; NOP; NOP;
+	R0 = [P0];
+	[P1++] = R0;
+.Llong_loop_e:
+	STI R3;
+
+	RTS;
+#endif
 ENDPROC(_insl)
 
 ENTRY(_insw)
+#ifdef CONFIG_BFIN_INS_LOWOVERHEAD
 	P0 = R0;	/* P0 = port */
 	cli R3;
 	P1 = R1;	/* P1 = address */
@@ -61,9 +99,26 @@ ENTRY(_insw)
 .Lword_loop_e: 	NOP;
 	sti R3;
 	RTS;
+#else
+	P0 = R0;	/* P0 = port */
+	P1 = R1;	/* P1 = address */
+	P2 = R2;	/* P2 = count */
+	SSYNC;
+	LSETUP( .Lword_loop_s, .Lword_loop_e) LC0 = P2;
+.Lword_loop_s:
+	CLI R3;
+	NOP; NOP; NOP;
+	R0 = W[P0];
+	W[P1++] = R0;
+.Lword_loop_e:
+	STI R3;
+	RTS;
+
+#endif
 ENDPROC(_insw)
 
 ENTRY(_insw_8)
+#ifdef CONFIG_BFIN_INS_LOWOVERHEAD
 	P0 = R0;	/* P0 = port */
 	cli R3;
 	P1 = R1;	/* P1 = address */
@@ -78,9 +133,29 @@ ENTRY(_insw_8)
 .Lword8_loop_e: NOP;
 	sti R3;
 	RTS;
+#else
+	P0 = R0;	/* P0 = port */
+	P1 = R1;	/* P1 = address */
+	P2 = R2;	/* P2 = count */
+	SSYNC;
+	LSETUP( .Lword8_loop_s, .Lword8_loop_e) LC0 = P2;
+.Lword8_loop_s:
+	CLI R3;
+	NOP; NOP; NOP;
+	R0 = W[P0];
+	B[P1++] = R0;
+	R0 = R0 >> 8;
+	B[P1++] = R0;
+	NOP;
+.Lword8_loop_e:
+	STI R3;
+
+	RTS;
+#endif
 ENDPROC(_insw_8)
 
 ENTRY(_insb)
+#ifdef CONFIG_BFIN_INS_LOWOVERHEAD
 	P0 = R0;	/* P0 = port */
 	cli R3;
 	P1 = R1;	/* P1 = address */
@@ -93,9 +168,26 @@ ENTRY(_insb)
 .Lbyte_loop_e:  NOP;
 	sti R3;
 	RTS;
+#else
+	P0 = R0;        /* P0 = port */
+	P1 = R1;        /* P1 = address */
+	P2 = R2;        /* P2 = count */
+	SSYNC;
+	LSETUP( .Lbyte_loop_s, .Lbyte_loop_e) LC0 = P2;
+.Lbyte_loop_s:
+	CLI R3;
+	NOP; NOP; NOP;
+	R0 = B[P0];
+	B[P1++] = R0;
+.Lbyte_loop_e:
+	STI R3;
+
+	RTS;
+#endif
 ENDPROC(_insb)
 
 ENTRY(_insl_16)
+#ifdef CONFIG_BFIN_INS_LOWOVERHEAD
 	P0 = R0;	/* P0 = port */
 	cli R3;
 	P1 = R1;	/* P1 = address */
@@ -110,4 +202,21 @@ ENTRY(_insl_16)
 .Llong16_loop_e:  NOP;
 	sti R3;
 	RTS;
+#else
+	P0 = R0;	/* P0 = port */
+	P1 = R1;	/* P1 = address */
+	P2 = R2;	/* P2 = count */
+	SSYNC;
+	LSETUP( .Llong16_loop_s, .Llong16_loop_e) LC0 = P2;
+.Llong16_loop_s:
+	CLI R3;
+	NOP; NOP; NOP;
+	R0 = [P0];
+	W[P1++] = R0;
+	R0 = R0 >> 16;
+	W[P1++] = R0;
+.Llong16_loop_e:
+	STI R3;
+	RTS;
+#endif
 ENDPROC(_insl_16)