diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_wm_emit.c')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_emit.c | 101 |
1 files changed, 99 insertions, 2 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 268f7965c0..bf80a2942a 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -34,8 +34,6 @@ #include "brw_context.h" #include "brw_wm.h" -#define SATURATE (1<<5) - /* Not quite sure how correct this is - need to understand horiz * vs. vertical strides a little better. */ @@ -281,6 +279,79 @@ static void emit_frontfacing( struct brw_compile *p, brw_set_predicate_control_flag_value(p, 0xff); } +/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input + * looking like: + * + * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br + * + * and we're trying to produce: + * + * DDX DDY + * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) + * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) + * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) + * (ss0.br - ss0.bl) (ss0.tr - ss0.br) + * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) + * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) + * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) + * (ss1.br - ss1.bl) (ss1.tr - ss1.br) + * + * and add another set of two more subspans if in 16-pixel dispatch mode. + * + * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result + * for each pair, and vertstride = 2 jumps us 2 elements after processing a + * pair. But for DDY, it's harder, as we want to produce the pairs swizzled + * between each other. We could probably do it like ddx and swizzle the right + * order later, but bail for now and just produce + * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) + */ +void emit_ddxy(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + GLboolean is_ddx, + const struct brw_reg *arg0) +{ + int i; + struct brw_reg src0, src1; + + if (mask & SATURATE) + brw_set_saturate(p, 1); + for (i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + if (is_ddx) { + src0 = brw_reg(arg0[i].file, arg0[i].nr, 1, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + src1 = brw_reg(arg0[i].file, arg0[i].nr, 0, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + } else { + src0 = brw_reg(arg0[i].file, arg0[i].nr, 0, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_4, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + src1 = brw_reg(arg0[i].file, arg0[i].nr, 2, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_4, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + } + brw_ADD(p, dst[i], src0, negate(src1)); + } + } + if (mask & SATURATE) + brw_set_saturate(p, 0); +} + static void emit_alu1( struct brw_compile *p, struct brw_instruction *(*func)(struct brw_compile *, struct brw_reg, @@ -908,6 +979,20 @@ static void emit_kil( struct brw_wm_compile *c, } } +/* KIL_NV kills the pixels that are currently executing, not based on a test + * of the arguments. + */ +static void emit_kil_nv( struct brw_wm_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK + brw_AND(p, r0uw, c->emit_mask_reg, r0uw); + brw_pop_insn_state(p); +} static void fire_fb_write( struct brw_wm_compile *c, GLuint base_reg, @@ -1258,6 +1343,14 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]); break; + case OPCODE_DDX: + emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]); + break; + + case OPCODE_DDY: + emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]); + break; + case OPCODE_DP3: emit_dp3(p, dst, dst_flags, args[0], args[1]); break; @@ -1387,6 +1480,10 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_kil(c, args[0]); break; + case OPCODE_KIL_NV: + emit_kil_nv(c); + break; + default: _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n", inst->opcode, inst->opcode < MAX_OPCODE ? |