summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/i965/brw_wm_emit.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_wm_emit.c')
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c101
1 files changed, 99 insertions, 2 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index 268f7965c0..bf80a2942a 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -34,8 +34,6 @@
#include "brw_context.h"
#include "brw_wm.h"
-#define SATURATE (1<<5)
-
/* Not quite sure how correct this is - need to understand horiz
* vs. vertical strides a little better.
*/
@@ -281,6 +279,79 @@ static void emit_frontfacing( struct brw_compile *p,
brw_set_predicate_control_flag_value(p, 0xff);
}
+/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
+ * looking like:
+ *
+ * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
+ *
+ * and we're trying to produce:
+ *
+ * DDX DDY
+ * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
+ * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
+ * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
+ * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
+ * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
+ * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
+ * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
+ * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
+ *
+ * and add another set of two more subspans if in 16-pixel dispatch mode.
+ *
+ * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
+ * for each pair, and vertstride = 2 jumps us 2 elements after processing a
+ * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
+ * between each other. We could probably do it like ddx and swizzle the right
+ * order later, but bail for now and just produce
+ * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
+ */
+void emit_ddxy(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ GLboolean is_ddx,
+ const struct brw_reg *arg0)
+{
+ int i;
+ struct brw_reg src0, src1;
+
+ if (mask & SATURATE)
+ brw_set_saturate(p, 1);
+ for (i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ if (is_ddx) {
+ src0 = brw_reg(arg0[i].file, arg0[i].nr, 1,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_2,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+ src1 = brw_reg(arg0[i].file, arg0[i].nr, 0,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_2,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+ } else {
+ src0 = brw_reg(arg0[i].file, arg0[i].nr, 0,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_4,
+ BRW_WIDTH_4,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+ src1 = brw_reg(arg0[i].file, arg0[i].nr, 2,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_4,
+ BRW_WIDTH_4,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+ }
+ brw_ADD(p, dst[i], src0, negate(src1));
+ }
+ }
+ if (mask & SATURATE)
+ brw_set_saturate(p, 0);
+}
+
static void emit_alu1( struct brw_compile *p,
struct brw_instruction *(*func)(struct brw_compile *,
struct brw_reg,
@@ -908,6 +979,20 @@ static void emit_kil( struct brw_wm_compile *c,
}
}
+/* KIL_NV kills the pixels that are currently executing, not based on a test
+ * of the arguments.
+ */
+static void emit_kil_nv( struct brw_wm_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
+ brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
+ brw_pop_insn_state(p);
+}
static void fire_fb_write( struct brw_wm_compile *c,
GLuint base_reg,
@@ -1258,6 +1343,14 @@ void brw_wm_emit( struct brw_wm_compile *c )
emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
break;
+ case OPCODE_DDX:
+ emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
+ break;
+
+ case OPCODE_DDY:
+ emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
+ break;
+
case OPCODE_DP3:
emit_dp3(p, dst, dst_flags, args[0], args[1]);
break;
@@ -1387,6 +1480,10 @@ void brw_wm_emit( struct brw_wm_compile *c )
emit_kil(c, args[0]);
break;
+ case OPCODE_KIL_NV:
+ emit_kil_nv(c);
+ break;
+
default:
_mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
inst->opcode, inst->opcode < MAX_OPCODE ?