1 files changed, 106 insertions, 89 deletions
diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c
index 5f7ae6592c..a705d8b344 100644
--- a/src/gallium/drivers/i965/brw_wm_emit.c
+++ b/src/gallium/drivers/i965/brw_wm_emit.c
@@ -28,10 +28,13 @@
   * Authors:
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
-               
+
+#include "util/u_math.h"
+#include "tgsi/tgsi_info.h"
 
 #include "brw_context.h"
 #include "brw_wm.h"
+#include "brw_debug.h"
 
 /* Not quite sure how correct this is - need to understand horiz
  * vs. vertical strides a little better.
@@ -45,15 +48,15 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg )
 
 /* Payload R0:
  *
- * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
+ * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 quads,
  *         corresponding to each of the 16 execution channels.
  * R0.1..8 -- ?
  * R1.0 -- triangle vertex 0.X
  * R1.1 -- triangle vertex 0.Y
- * R1.2 -- tile 0 x,y coords (2 packed uwords)
- * R1.3 -- tile 1 x,y coords (2 packed uwords)
- * R1.4 -- tile 2 x,y coords (2 packed uwords)
- * R1.5 -- tile 3 x,y coords (2 packed uwords)
+ * R1.2 -- quad 0 x,y coords (2 packed uwords)
+ * R1.3 -- quad 1 x,y coords (2 packed uwords)
+ * R1.4 -- quad 2 x,y coords (2 packed uwords)
+ * R1.5 -- quad 3 x,y coords (2 packed uwords)
  * R1.6 -- ?
  * R1.7 -- ?
  * R1.8 -- ?
@@ -134,11 +137,17 @@ static void emit_wpos_xy(struct brw_wm_compile *c,
    /* XXX: is this needed any more, or is this a NOOP?
     */
    if (mask & BRW_WRITEMASK_Y) {
+#if 0
       /* Y' = height - 1 - Y */
       brw_ADD(p,
 	      dst[1],
 	      negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
 	      brw_imm_d(c->key.drawable_height - 1));
+#else
+      brw_MOV(p,
+	      dst[0],
+	      retype(arg0[0], BRW_REGISTER_TYPE_W));
+#endif
    }
 }
 
@@ -279,28 +288,28 @@ static void emit_frontfacing( struct brw_compile *p,
 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
  * looking like:
  *
- * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
+ * arg0: q0.tl q0.tr q0.bl q0.br q1.tl q1.tr q1.bl q1.br
  *
  * and we're trying to produce:
  *
  *           DDX                     DDY
- * dst: (ss0.tr - ss0.tl)     (ss0.tl - ss0.bl)
- *      (ss0.tr - ss0.tl)     (ss0.tr - ss0.br)
- *      (ss0.br - ss0.bl)     (ss0.tl - ss0.bl)
- *      (ss0.br - ss0.bl)     (ss0.tr - ss0.br)
- *      (ss1.tr - ss1.tl)     (ss1.tl - ss1.bl)
- *      (ss1.tr - ss1.tl)     (ss1.tr - ss1.br)
- *      (ss1.br - ss1.bl)     (ss1.tl - ss1.bl)
- *      (ss1.br - ss1.bl)     (ss1.tr - ss1.br)
+ * dst: (q0.tr - q0.tl)     (q0.tl - q0.bl)
+ *      (q0.tr - q0.tl)     (q0.tr - q0.br)
+ *      (q0.br - q0.bl)     (q0.tl - q0.bl)
+ *      (q0.br - q0.bl)     (q0.tr - q0.br)
+ *      (q1.tr - q1.tl)     (q1.tl - q1.bl)
+ *      (q1.tr - q1.tl)     (q1.tr - q1.br)
+ *      (q1.br - q1.bl)     (q1.tl - q1.bl)
+ *      (q1.br - q1.bl)     (q1.tr - q1.br)
  *
- * and add another set of two more subspans if in 16-pixel dispatch mode.
+ * and add two more quads if in 16-pixel dispatch mode.
  *
  * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
  * for each pair, and vertstride = 2 jumps us 2 elements after processing a
  * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
  * between each other.  We could probably do it like ddx and swizzle the right
  * order later, but bail for now and just produce
- * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
+ * ((q0.tl - q0.bl)x4 (q1.tl - q1.bl)x4)
  */
 void emit_ddxy(struct brw_compile *p,
 	       const struct brw_reg *dst,
@@ -611,12 +620,12 @@ static void emit_dp3( struct brw_compile *p,
 		      const struct brw_reg *arg0,
 		      const struct brw_reg *arg1 )
 {
-   int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1;
+   int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
 
    if (!(mask & BRW_WRITEMASK_XYZW))
       return; /* Do not emit dead code */
 
-   assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW));
+   assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
 
    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
@@ -633,12 +642,12 @@ static void emit_dp4( struct brw_compile *p,
 		      const struct brw_reg *arg0,
 		      const struct brw_reg *arg1 )
 {
-   int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1;
+   int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
 
    if (!(mask & BRW_WRITEMASK_XYZW))
       return; /* Do not emit dead code */
 
-   assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW));
+   assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
 
    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
@@ -656,12 +665,12 @@ static void emit_dph( struct brw_compile *p,
 		      const struct brw_reg *arg0,
 		      const struct brw_reg *arg1 )
 {
-   const int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1;
+   const int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
 
    if (!(mask & BRW_WRITEMASK_XYZW))
       return; /* Do not emit dead code */
 
-   assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW));
+   assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
 
    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
@@ -704,12 +713,12 @@ static void emit_math1( struct brw_compile *p,
 			GLuint mask,
 			const struct brw_reg *arg0 )
 {
-   int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1;
+   int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
 
    if (!(mask & BRW_WRITEMASK_XYZW))
       return; /* Do not emit dead code */
 
-   assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW));
+   assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
 
    brw_MOV(p, brw_message_reg(2), arg0[0]);
 
@@ -732,12 +741,12 @@ static void emit_math2( struct brw_compile *p,
 			const struct brw_reg *arg0,
 			const struct brw_reg *arg1)
 {
-   int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1;
+   int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
 
    if (!(mask & BRW_WRITEMASK_XYZW))
       return; /* Do not emit dead code */
 
-   assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW));
+   assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
 
    brw_push_insn_state(p);
 
@@ -790,21 +799,32 @@ static void emit_tex( struct brw_wm_compile *c,
    GLuint i, nr;
    GLuint emit;
    GLuint msg_type;
+   GLboolean shadow = FALSE;
 
    /* How many input regs are there?
     */
-   switch (inst->tex_idx) {
-   case TEXTURE_1D_INDEX:
+   switch (inst->tex_target) {
+   case TGSI_TEXTURE_1D:
       emit = BRW_WRITEMASK_X;
       nr = 1;
       break;
-   case TEXTURE_2D_INDEX:
-   case TEXTURE_RECT_INDEX:
+   case TGSI_TEXTURE_SHADOW1D:
+      emit = BRW_WRITEMASK_XW;
+      nr = 4;
+      shadow = TRUE;
+      break;
+   case TGSI_TEXTURE_2D:
       emit = BRW_WRITEMASK_XY;
       nr = 2;
       break;
-   case TEXTURE_3D_INDEX:
-   case TEXTURE_CUBE_INDEX:
+   case TGSI_TEXTURE_SHADOW2D:
+   case TGSI_TEXTURE_SHADOWRECT:
+      emit = BRW_WRITEMASK_XYW;
+      nr = 4;
+      shadow = TRUE;
+      break;
+   case TGSI_TEXTURE_3D:
+   case TGSI_TEXTURE_CUBE:
       emit = BRW_WRITEMASK_XYZ;
       nr = 3;
       break;
@@ -813,11 +833,6 @@ static void emit_tex( struct brw_wm_compile *c,
       abort();
    }
 
-   if (inst->tex_shadow) {
-      nr = 4;
-      emit |= BRW_WRITEMASK_W;
-   }
-
    msgLength = 1;
 
    for (i = 0; i < nr; i++) {
@@ -832,12 +847,12 @@ static void emit_tex( struct brw_wm_compile *c,
    responseLength = 8;		/* always */
 
    if (BRW_IS_IGDNG(p->brw)) {
-       if (inst->tex_shadow)
+       if (shadow)
            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG;
        else
            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG;
    } else {
-       if (inst->tex_shadow)
+       if (shadow)
            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
        else
            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
@@ -870,20 +885,23 @@ static void emit_txb( struct brw_wm_compile *c,
    GLuint msg_type;
    /* Shadow ignored for txb.
     */
-   switch (inst->tex_idx) {
-   case TEXTURE_1D_INDEX:
+   switch (inst->tex_target) {
+   case TGSI_TEXTURE_1D:
+   case TGSI_TEXTURE_SHADOW1D:
       brw_MOV(p, brw_message_reg(2), arg[0]);
       brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
       break;
-   case TEXTURE_2D_INDEX:
-   case TEXTURE_RECT_INDEX:
+   case TGSI_TEXTURE_2D:
+   case TGSI_TEXTURE_RECT:
+   case TGSI_TEXTURE_SHADOW2D:
+   case TGSI_TEXTURE_SHADOWRECT:
       brw_MOV(p, brw_message_reg(2), arg[0]);
       brw_MOV(p, brw_message_reg(4), arg[1]);
       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
       break;
-   case TEXTURE_3D_INDEX:
-   case TEXTURE_CUBE_INDEX:
+   case TGSI_TEXTURE_3D:
+   case TGSI_TEXTURE_CUBE:
       brw_MOV(p, brw_message_reg(2), arg[0]);
       brw_MOV(p, brw_message_reg(4), arg[1]);
       brw_MOV(p, brw_message_reg(6), arg[2]);
@@ -976,10 +994,10 @@ static void emit_kil( struct brw_wm_compile *c,
    }
 }
 
-/* KIL_NV kills the pixels that are currently executing, not based on a test
+/* KILLP kills the pixels that are currently executing, not based on a test
  * of the arguments.
  */
-static void emit_kil_nv( struct brw_wm_compile *c )
+static void emit_killp( struct brw_wm_compile *c )
 {
    struct brw_compile *p = &c->func;
    struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
@@ -1259,7 +1277,7 @@ void brw_wm_emit( struct brw_wm_compile *c )
     */
    spill_values(c, c->payload.depth, 4);
    spill_values(c, c->creg, c->nr_creg);
-   spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
+   spill_values(c, c->payload.input_interp, PIPE_MAX_SHADER_INPUTS);
    
 
    for (insn = 0; insn < c->nr_insns; insn++) {
@@ -1328,89 +1346,89 @@ void brw_wm_emit( struct brw_wm_compile *c )
 
 	 /* Straightforward arithmetic:
 	  */
-      case OPCODE_ADD:
+      case TGSI_OPCODE_ADD:
 	 emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
 	 break;
 
-      case OPCODE_FRC:
+      case TGSI_OPCODE_FRC:
 	 emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
 	 break;
 
-      case OPCODE_FLR:
+      case TGSI_OPCODE_FLR:
 	 emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
 	 break;
 
-      case OPCODE_DDX:
+      case TGSI_OPCODE_DDX:
 	 emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
 	 break;
 
-      case OPCODE_DDY:
+      case TGSI_OPCODE_DDY:
 	 emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
 	 break;
 
-      case OPCODE_DP3:
+      case TGSI_OPCODE_DP3:
 	 emit_dp3(p, dst, dst_flags, args[0], args[1]);
 	 break;
 
-      case OPCODE_DP4:
+      case TGSI_OPCODE_DP4:
 	 emit_dp4(p, dst, dst_flags, args[0], args[1]);
 	 break;
 
-      case OPCODE_DPH:
+      case TGSI_OPCODE_DPH:
 	 emit_dph(p, dst, dst_flags, args[0], args[1]);
 	 break;
 
-      case OPCODE_TRUNC:
+      case TGSI_OPCODE_TRUNC:
 	 emit_trunc(p, dst, dst_flags, args[0]);
 	 break;
 
-      case OPCODE_LRP:
+      case TGSI_OPCODE_LRP:
 	 emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
 	 break;
 
-      case OPCODE_MAD:	
+      case TGSI_OPCODE_MAD:	
 	 emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
 	 break;
 
-      case OPCODE_MOV:
+      case TGSI_OPCODE_MOV:
 	 emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
 	 break;
 
-      case OPCODE_MUL:
+      case TGSI_OPCODE_MUL:
 	 emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
 	 break;
 
-      case OPCODE_XPD:
+      case TGSI_OPCODE_XPD:
 	 emit_xpd(p, dst, dst_flags, args[0], args[1]);
 	 break;
 
 	 /* Higher math functions:
 	  */
-      case OPCODE_RCP:
+      case TGSI_OPCODE_RCP:
 	 emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
 	 break;
 
-      case OPCODE_RSQ:
+      case TGSI_OPCODE_RSQ:
 	 emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
 	 break;
 
-      case OPCODE_SIN:
+      case TGSI_OPCODE_SIN:
 	 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
 	 break;
 
-      case OPCODE_COS:
+      case TGSI_OPCODE_COS:
 	 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
 	 break;
 
-      case OPCODE_EX2:
+      case TGSI_OPCODE_EX2:
 	 emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
 	 break;
 
-      case OPCODE_LG2:
+      case TGSI_OPCODE_LG2:
 	 emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
 	 break;
 
-      case OPCODE_SCS:
+      case TGSI_OPCODE_SCS:
 	 /* There is an scs math function, but it would need some
 	  * fixup for 16-element execution.
 	  */
@@ -1420,71 +1438,70 @@ void brw_wm_emit( struct brw_wm_compile *c )
 	    emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|BRW_WRITEMASK_X, args[0]);
 	 break;
 
-      case OPCODE_POW:
+      case TGSI_OPCODE_POW:
 	 emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
 	 break;
 
 	 /* Comparisons:
 	  */
-      case OPCODE_CMP:
+      case TGSI_OPCODE_CMP:
 	 emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
 	 break;
 
-      case OPCODE_MAX:
+      case TGSI_OPCODE_MAX:
 	 emit_max(p, dst, dst_flags, args[0], args[1]);
 	 break;
 
-      case OPCODE_MIN:
+      case TGSI_OPCODE_MIN:
 	 emit_min(p, dst, dst_flags, args[0], args[1]);
 	 break;
 
-      case OPCODE_SLT:
+      case TGSI_OPCODE_SLT:
 	 emit_slt(p, dst, dst_flags, args[0], args[1]);
 	 break;
 
-      case OPCODE_SLE:
+      case TGSI_OPCODE_SLE:
 	 emit_sle(p, dst, dst_flags, args[0], args[1]);
 	break;
-      case OPCODE_SGT:
+      case TGSI_OPCODE_SGT:
 	 emit_sgt(p, dst, dst_flags, args[0], args[1]);
 	break;
-      case OPCODE_SGE:
+      case TGSI_OPCODE_SGE:
 	 emit_sge(p, dst, dst_flags, args[0], args[1]);
 	 break;
-      case OPCODE_SEQ:
+      case TGSI_OPCODE_SEQ:
 	 emit_seq(p, dst, dst_flags, args[0], args[1]);
 	break;
-      case OPCODE_SNE:
+      case TGSI_OPCODE_SNE:
 	 emit_sne(p, dst, dst_flags, args[0], args[1]);
 	break;
 
-      case OPCODE_LIT:
+      case TGSI_OPCODE_LIT:
 	 emit_lit(p, dst, dst_flags, args[0]);
 	 break;
 
 	 /* Texturing operations:
 	  */
-      case OPCODE_TEX:
+      case TGSI_OPCODE_TEX:
 	 emit_tex(c, inst, dst, dst_flags, args[0]);
 	 break;
 
-      case OPCODE_TXB:
+      case TGSI_OPCODE_TXB:
 	 emit_txb(c, inst, dst, dst_flags, args[0]);
 	 break;
 
-      case OPCODE_KIL:
+      case TGSI_OPCODE_KIL:
 	 emit_kil(c, args[0]);
 	 break;
 
-      case OPCODE_KIL_NV:
-	 emit_kil_nv(c);
+      case TGSI_OPCODE_KILP:
+	 emit_killp(c);
 	 break;
 
       default:
 	 debug_printf("Unsupported opcode %i (%s) in fragment shader\n",
-		      inst->opcode, inst->opcode < MAX_OPCODE ?
-				    _mesa_opcode_string(inst->opcode) :
-				    "unknown");
+		      inst->opcode, 
+		      tgsi_get_opcode_info(inst->opcode)->mnemonic);
       }
       
       for (i = 0; i < 4; i++)