summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Whitwell <keith@tungstengraphics.com>2005-06-07 10:59:37 +0000
committerKeith Whitwell <keith@tungstengraphics.com>2005-06-07 10:59:37 +0000
commit757e0855adb1b1eb45b55e1fcf6acb47224b2853 (patch)
tree7b9fe3f1f8ab9bc4659bc50c409d6725925a909a
parent17d249d31b3b382af04610b2ca37d01b8f06abe4 (diff)
Simplify interpreted language:
- Expand operand argument specifiers so that all registers and parameters can be referenced directly. Remove old PAR/PRL instructions. - No 3 operand instructions, translate MAD -> MUL, ADD. (No room for 3 full operands in instruction struct). - Translate SWZ instructions into 1 or 2 reduced swizzles at compile time. - Add hardwired code for moving input and output values to/from the register file. Drop old INx, OUT instructions.
-rw-r--r--src/mesa/tnl/t_vb_arbprogram.c1206
1 files changed, 581 insertions, 625 deletions
diff --git a/src/mesa/tnl/t_vb_arbprogram.c b/src/mesa/tnl/t_vb_arbprogram.c
index 3be82c7298..5494eed098 100644
--- a/src/mesa/tnl/t_vb_arbprogram.c
+++ b/src/mesa/tnl/t_vb_arbprogram.c
@@ -49,118 +49,87 @@
/* New, internal instructions:
*/
-#define IN1 (VP_OPCODE_XPD+1)
-#define IN2 (IN1+1) /* intput-to-reg MOV */
-#define IN3 (IN1+2)
-#define IN4 (IN1+3)
-#define OUT (IN1+4) /* reg-to-output MOV */
-#define OUM (IN1+5) /* reg-to-output MOV with mask */
-#define RSW (IN1+6)
-#define MSK (IN1+7) /* reg-to-reg MOV with mask */
-#define PAR (IN1+8) /* parameter-to-reg MOV */
-#define PRL (IN1+9) /* parameter-to-reg MOV */
+#define RSW (VP_MAX_OPCODE)
+#define SEL (VP_MAX_OPCODE+1)
+#define REL (VP_MAX_OPCODE+2)
/* Layout of register file:
0 -- Scratch (Arg0)
1 -- Scratch (Arg1)
- 2 -- Scratch (Arg2)
- 3 -- Scratch (Result)
+ 2 -- Scratch (Result)
4 -- Program Temporary 0
+ 16 -- Program Temporary 12 (max for NV_VERTEX_PROGRAM)
+ 17 -- Output 0
+ 31 -- Output 15 (max for NV_VERTEX_PROGRAM) (Last writeable register)
+ 32 -- Parameter 0
..
- 31 -- Program Temporary 27
- 32 -- State/Input/Const shadow 0
- ..
- 63 -- State/Input/Const shadow 31
+ 127 -- Parameter 63 (max for NV_VERTEX_PROGRAM)
*/
-
-
-#define REG_ARG0 0
-#define REG_ARG1 1
-#define REG_ARG2 2
-#define REG_RES 3
-#define REG_TMP0 4
-#define REG_TMP_MAX 32
-#define REG_TMP_NR (REG_TMP_MAX-REG_TMP0)
-#define REG_PAR0 32
-#define REG_PAR_MAX 64
-#define REG_PAR_NR (REG_PAR_MAX-REG_PAR0)
-
-#define REG_MAX 64
-#define REG_SWZDST_MAX 16
+#define FILE_REG 0
+#define FILE_LOCAL_PARAM 1
+#define FILE_ENV_PARAM 2
+#define FILE_STATE_PARAM 3
+
+
+#define REG_ARG0 0
+#define REG_ARG1 1
+#define REG_ARG2 2
+#define REG_RES 3
+#define REG_ADDR 4
+#define REG_TMP0 5
+#define REG_TMP11 16
+#define REG_OUT0 17
+#define REG_OUT14 31
+#define REG_IN0 32
+#define REG_IN15 47
+#define REG_ID 48 /* 0,0,0,1 */
+#define REG_MAX 128
+#define REG_INVALID ~0
/* ARB_vp instructions are broken down into one or more of the
* following micro-instructions, each representable in a 32 bit packed
* structure.
*/
-
-union instruction {
- struct {
- GLuint opcode:6;
- GLuint dst:5;
- GLuint arg0:6;
- GLuint arg1:6;
- GLuint elt:2; /* x,y,z or w */
- GLuint pad:7;
- } scl;
+struct reg {
+ GLuint file:2;
+ GLuint idx:7;
+};
+union instruction {
struct {
GLuint opcode:6;
GLuint dst:5;
- GLuint arg0:6;
- GLuint arg1:6;
- GLuint arg2:6;
+ GLuint file0:2;
+ GLuint idx0:7;
+ GLuint file1:2;
+ GLuint idx1:7;
GLuint pad:3;
- } vec;
+ } alu;
struct {
GLuint opcode:6;
- GLuint dst:4; /* NOTE! REG 0..16 only! */
- GLuint arg0:6;
- GLuint neg:4;
- GLuint swz:12;
- } swz;
-
- struct {
- GLuint opcode:6;
- GLuint dst:6;
- GLuint arg0:6;
- GLuint neg:1; /* 1 bit only */
+ GLuint dst:5;
+ GLuint file0:2;
+ GLuint idx0:7;
+ GLuint neg:4;
GLuint swz:8; /* xyzw only */
- GLuint pad:5;
} rsw;
struct {
GLuint opcode:6;
- GLuint reg:6;
- GLuint file:5;
- GLuint idx:8; /* plenty? */
- GLuint rel:1;
- GLuint pad:6;
- } inr;
-
-
- struct {
- GLuint opcode:6;
- GLuint reg:6;
- GLuint file:5;
- GLuint idx:8; /* plenty? */
- GLuint mask:4;
- GLuint pad:3;
- } out;
-
- struct {
- GLuint opcode:6;
GLuint dst:5;
- GLuint arg0:6;
+ GLuint idx0:7; /* note! */
+ GLuint file1:2;
+ GLuint idx1:7;
GLuint mask:4;
- GLuint pad:11;
- } msk;
+ GLuint pad:1;
+ } sel;
GLuint dword;
};
@@ -168,32 +137,39 @@ union instruction {
struct compilation {
- struct {
- GLuint file:5;
- GLuint idx:8;
- } reg[REG_PAR_NR];
-
- GLuint par_active;
- GLuint par_protected;
- GLuint tmp_active;
-
+ GLuint reg_active;
union instruction *csr;
-
struct vertex_buffer *VB; /* for input sizes! */
};
+struct input {
+ GLuint idx;
+ GLfloat *data;
+ GLuint stride;
+ GLuint size;
+};
+
+struct output {
+ GLuint idx;
+ GLfloat *data;
+};
+
/*--------------------------------------------------------------------------- */
/*!
* Private storage for the vertex program pipeline stage.
*/
struct arb_vp_machine {
- GLfloat reg[REG_MAX][4]; /* Program temporaries, shadowed parameters and inputs,
- plus some internal values */
-
- GLfloat (*File[8])[4]; /* Src/Dest for PAR/PRL instructions. */
+ GLfloat reg[REG_MAX][4]; /* Program temporaries, inputs and outputs */
+ GLfloat (*File[4])[4]; /* All values reference-able from the program. */
GLint AddressReg;
+ struct input input[16];
+ GLuint nr_inputs;
+
+ struct output output[15];
+ GLuint nr_outputs;
+
union instruction store[1024];
union instruction *instructions;
GLint nr_instructions;
@@ -213,10 +189,8 @@ struct arb_vp_machine {
/*--------------------------------------------------------------------------- */
struct opcode_info {
- GLuint type;
GLuint nr_args;
const char *string;
- void (*func)( struct arb_vp_machine *, union instruction );
void (*print)( union instruction , const struct opcode_info * );
};
@@ -272,11 +246,7 @@ static GLfloat RoughApproxPow2(GLfloat t)
static GLfloat RoughApproxPower(GLfloat x, GLfloat y)
{
-#if 0
- return (GLfloat) exp(y * log(x));
-#else
return (GLfloat) _mesa_pow(x, y);
-#endif
}
@@ -284,156 +254,50 @@ static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
-
-/**
- * This is probably the least-optimal part of the process, have to
- * multiply out the stride to access each incoming input value.
- */
-static GLfloat *get_input( struct arb_vp_machine *m, GLuint index )
-{
- return VEC_ELT(m->VB->AttribPtr[index], GLfloat, m->vtx_nr);
-}
+#define GET_RSW(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
/**
- * Fetch a 4-element float vector from the given source register.
- * Deal with the possibility that not all elements are present.
- */
-static void do_IN1( struct arb_vp_machine *m, union instruction op )
-{
- GLfloat *result = m->reg[op.inr.reg];
- const GLfloat *src = get_input(m, op.inr.idx);
-
- result[0] = src[0];
- result[1] = 0;
- result[2] = 0;
- result[3] = 1;
-}
-
-static void do_IN2( struct arb_vp_machine *m, union instruction op )
-{
- GLfloat *result = m->reg[op.inr.reg];
- const GLfloat *src = get_input(m, op.inr.idx);
-
- result[0] = src[0];
- result[1] = src[1];
- result[2] = 0;
- result[3] = 1;
-}
-
-static void do_IN3( struct arb_vp_machine *m, union instruction op )
-{
- GLfloat *result = m->reg[op.inr.reg];
- const GLfloat *src = get_input(m, op.inr.idx);
-
- result[0] = src[0];
- result[1] = src[1];
- result[2] = src[2];
- result[3] = 1;
-}
-
-static void do_IN4( struct arb_vp_machine *m, union instruction op )
-{
- GLfloat *result = m->reg[op.inr.reg];
- const GLfloat *src = get_input(m, op.inr.idx);
-
- result[0] = src[0];
- result[1] = src[1];
- result[2] = src[2];
- result[3] = src[3];
-}
-
-/**
* Perform a reduced swizzle:
*/
static void do_RSW( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->reg[op.rsw.dst];
- const GLfloat *arg0 = m->reg[op.rsw.arg0];
+ const GLfloat *arg0 = m->File[op.rsw.file0][op.rsw.idx0];
GLuint swz = op.rsw.swz;
GLuint neg = op.rsw.neg;
- GLuint i;
-
- if (neg)
- for (i = 0; i < 4; i++, swz >>= 2)
- result[i] = -arg0[swz & 0x3];
- else
- for (i = 0; i < 4; i++, swz >>= 2)
- result[i] = arg0[swz & 0x3];
-}
-
-
-
-/**
- * Store 4 floats into an external address.
- */
-static void do_OUM( struct arb_vp_machine *m, union instruction op )
-{
- GLfloat *dst = m->attribs[op.out.idx].data[m->vtx_nr];
- const GLfloat *value = m->reg[op.out.reg];
-
- if (op.out.mask & 0x1) dst[0] = value[0];
- if (op.out.mask & 0x2) dst[1] = value[1];
- if (op.out.mask & 0x4) dst[2] = value[2];
- if (op.out.mask & 0x8) dst[3] = value[3];
-}
-
-static void do_OUT( struct arb_vp_machine *m, union instruction op )
-{
- GLfloat *dst = m->attribs[op.out.idx].data[m->vtx_nr];
- const GLfloat *value = m->reg[op.out.reg];
- dst[0] = value[0];
- dst[1] = value[1];
- dst[2] = value[2];
- dst[3] = value[3];
+ result[0] = arg0[GET_RSW(swz, 0)];
+ result[1] = arg0[GET_RSW(swz, 1)];
+ result[2] = arg0[GET_RSW(swz, 2)];
+ result[3] = arg0[GET_RSW(swz, 3)];
+
+ if (neg) {
+ if (neg & 0x1) result[0] = -result[0];
+ if (neg & 0x2) result[1] = -result[1];
+ if (neg & 0x4) result[2] = -result[2];
+ if (neg & 0x8) result[3] = -result[3];
+ }
}
-/* Register-to-register MOV with writemask.
+/* Used to implement write masking
*/
-static void do_MSK( struct arb_vp_machine *m, union instruction op )
+static void do_SEL( struct arb_vp_machine *m, union instruction op )
{
- GLfloat *dst = m->reg[op.msk.dst];
- const GLfloat *arg0 = m->reg[op.msk.arg0];
+ GLfloat *dst = m->reg[op.sel.dst];
+ const GLfloat *arg0 = m->reg[op.sel.idx0];
+ const GLfloat *arg1 = m->File[op.sel.file1][op.sel.idx1];
- if (op.msk.mask & 0x1) dst[0] = arg0[0];
- if (op.msk.mask & 0x2) dst[1] = arg0[1];
- if (op.msk.mask & 0x4) dst[2] = arg0[2];
- if (op.msk.mask & 0x8) dst[3] = arg0[3];
-}
-
-
-/* Retreive parameters and other constant values:
- */
-static void do_PAR( struct arb_vp_machine *m, union instruction op )
-{
- GLfloat *result = m->reg[op.inr.reg];
- const GLfloat *src = m->File[op.inr.file][op.inr.idx];
-
- result[0] = src[0];
- result[1] = src[1];
- result[2] = src[2];
- result[3] = src[3];
+ dst[0] = (op.sel.mask & 0x1) ? arg0[0] : arg1[0];
+ dst[1] = (op.sel.mask & 0x2) ? arg0[1] : arg1[1];
+ dst[2] = (op.sel.mask & 0x4) ? arg0[2] : arg1[2];
+ dst[3] = (op.sel.mask & 0x8) ? arg0[3] : arg1[3];
}
-#define RELADDR_MASK (MAX_NV_VERTEX_PROGRAM_PARAMS-1)
-
-static void do_PRL( struct arb_vp_machine *m, union instruction op )
-{
- GLfloat *result = m->reg[op.inr.reg];
- GLuint index = (op.inr.idx + m->AddressReg) & RELADDR_MASK;
- const GLfloat *src = m->File[op.inr.file][index];
-
- result[0] = src[0];
- result[1] = src[1];
- result[2] = src[2];
- result[3] = src[3];
-}
-
static void do_PRT( struct arb_vp_machine *m, union instruction op )
{
- const GLfloat *arg0 = m->reg[op.vec.arg0];
+ const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
_mesa_printf("%d: %f %f %f %f\n", m->vtx_nr,
arg0[0], arg0[1], arg0[2], arg0[3]);
@@ -447,8 +311,8 @@ static void do_PRT( struct arb_vp_machine *m, union instruction op )
static void do_ABS( struct arb_vp_machine *m, union instruction op )
{
- GLfloat *result = m->reg[op.vec.dst];
- const GLfloat *arg0 = m->reg[op.vec.arg0];
+ GLfloat *result = m->reg[op.alu.dst];
+ const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
result[0] = (arg0[0] < 0.0) ? -arg0[0] : arg0[0];
result[1] = (arg0[1] < 0.0) ? -arg0[1] : arg0[1];
@@ -458,9 +322,9 @@ static void do_ABS( struct arb_vp_machine *m, union instruction op )
static void do_ADD( struct arb_vp_machine *m, union instruction op )
{
- GLfloat *result = m->reg[op.vec.dst];
- const GLfloat *arg0 = m->reg[op.vec.arg0];
- const GLfloat *arg1 = m->reg[op.vec.arg1];
+ GLfloat *result = m->reg[op.alu.dst];
+ const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
+ const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
result[0] = arg0[0] + arg1[0];
result[1] = arg0[1] + arg1[1];
@@ -471,16 +335,16 @@ static void do_ADD( struct arb_vp_machine *m, union instruction op )
static void do_ARL( struct arb_vp_machine *m, union instruction op )
{
- const GLfloat *arg0 = m->reg[op.out.reg];
- m->AddressReg = (GLint) floor(arg0[0]);
+ const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
+ m->reg[REG_ADDR][0] = FLOORF(arg0[0]);
}
static void do_DP3( struct arb_vp_machine *m, union instruction op )
{
- GLfloat *result = m->reg[op.scl.dst];
- const GLfloat *arg0 = m->reg[op.scl.arg0];
- const GLfloat *arg1 = m->reg[op.scl.arg1];
+ GLfloat *result = m->reg[op.alu.dst];
+ const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
+ const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
result[0] = (arg0[0] * arg1[0] +
arg0[1] * arg1[1] +
@@ -489,26 +353,13 @@ static void do_DP3( struct arb_vp_machine *m, union instruction op )
PUFF(result);
}
-#if 0
-static void do_MAT4( struct arb_vp_machine *m, union instruction op )
-{
- GLfloat *result = m->reg[op.scl.dst];
- const GLfloat *arg0 = m->reg[op.scl.arg0];
- const GLfloat *mat[] = m->reg[op.scl.arg1];
-
- result[0] = (arg0[0] * mat0[0] + arg0[1] * mat0[1] + arg0[2] * mat0[2] + arg0[3] * mat0[3]);
- result[1] = (arg0[0] * mat1[0] + arg0[1] * mat1[1] + arg0[2] * mat1[2] + arg0[3] * mat1[3]);
- result[2] = (arg0[0] * mat2[0] + arg0[1] * mat2[1] + arg0[2] * mat2[2] + arg0[3] * mat2[3]);
- result[3] = (arg0[0] * mat3[0] + arg0[1] * mat3[1] + arg0[2] * mat3[2] + arg0[3] * mat3[3]);
-}
-#endif
static void do_DP4( struct arb_vp_machine *m, union instruction op )
{
- GLfloat *result = m->reg[op.scl.dst];
- const GLfloat *arg0 = m->reg[op.scl.arg0];
- const GLfloat *arg1 = m->reg[op.scl.arg1];
+ GLfloat *result = m->reg[op.alu.dst];
+ const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
+ const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
result[0] = (arg0[0] * arg1[0] +
arg0[1] * arg1[1] +
@@ -520,9 +371,9 @@ static void do_DP4( struct arb_vp_machine *m, union instruction op )
static void do_DPH( struct arb_vp_machine *m, union instruction op )
{
- GLfloat *result = m->reg[op.scl.dst];
- const GLfloat *arg0 = m->reg[op.scl.arg0];
- const GLfloat *arg1 = m->reg[op.scl.arg1];
+ GLfloat *result = m->reg[op.alu.dst];
+ const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
+ const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
result[0] = (arg0[0] * arg1[0] +
arg0[1] * arg1[1] +
@@ -534,9 +385,9 @@ static void do_DPH( struct arb_vp_machine *m, union instruction op )
static void do_DST( struct arb_vp_machine *m, union instruction op )
{
- GLfloat *result = m->reg[op.vec.dst];
- const GLfloat *arg0 = m->reg[op.vec.arg0];
- const GLfloat *arg1 = m->reg[op.vec.arg1];
+ GLfloat *result = m->reg[op.alu.dst];
+ const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
+ const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
result[0] = 1.0F;
result[1] = arg0[1] * arg1[1];
@@ -547,8 +398,8 @@ static void do_DST( struct arb_vp_machine *m, union instruction op )
static void do_EX2( struct arb_vp_machine *m, union instruction op )
{
- GLfloat *result = m->reg[op.scl.dst];
- const GLfloat *arg0 = m->reg[op.scl.arg0];
+ GLfloat *result = m->reg[op.alu.dst];
+ const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
result[0] = (GLfloat)RoughApproxPow2(arg0[0]);
PUFF(result);
@@ -556,8 +407,8 @@ static void do_EX2( struct arb_vp_machine *m, union instruction op )
static void do_EXP( struct arb_vp_machine *m, union instruction op )
{
- GLfloat *result = m->reg[op.vec.dst];
- const GLfloat *arg0 = m->reg[op.vec.arg0];
+ GLfloat *result = m->reg[op.alu.dst];
+ const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
GLfloat tmp = arg0[0];
GLfloat flr_tmp = FLOORF(tmp);
@@ -572,8 +423,8 @@ static void do_EXP( struct arb_vp_machine *m, union instruction op )
static void do_FLR( struct arb_vp_machine *m, union instruction op )
{
- GLfloat *result = m->reg[op.vec.dst];
- const GLfloat *arg0 = m->reg[op.vec.arg0];
+ GLfloat *result = m->reg[op.alu.dst];
+ const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
result[0] = FLOORF(arg0[0]);
result[1] = FLOORF(arg0[1]);
@@ -583,8 +434,8 @@ static void do_FLR( struct arb_vp_machine *m, union instruction op )
static void do_FRC( struct arb_vp_machine *m, union instruction op )
{
- GLfloat *result = m->reg[op.vec.dst];
- const GLfloat *arg0 = m->reg[op.vec.arg0];
+ GLfloat *result = m->reg[op.alu.dst];
+ const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
result[0] = arg0[0] - FLOORF(arg0[0]);
result[1] = arg0[1] - FLOORF(arg0[1]);
@@ -594,8 +445,8 @@ static void do_FRC( struct arb_vp_machine *m, union instruction op )
static void do_LG2( struct arb_vp_machine *m, union instruction op )
{
- GLfloat *result = m->reg[op.scl.dst];
- const GLfloat *arg0 = m->reg[op.scl.arg0];
+ GLfloat *result = m->reg[op.alu.dst];
+ const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
result[0] = RoughApproxLog2(arg0[0]);
PUFF(result);
@@ -605,8 +456,8 @@ static void do_LG2( struct arb_vp_machine *m, union instruction op )
static void do_LIT( struct arb_vp_machine *m, union instruction op )
{
- GLfloat *result = m->reg[op.vec.dst];
- const GLfloat *arg0 = m->reg[op.vec.arg0];
+ GLfloat *result = m->reg[op.alu.dst];
+ const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
const GLfloat epsilon = 1.0F / 256.0F; /* per NV spec */
GLfloat tmp[4];
@@ -624,8 +475,8 @@ static void do_LIT( struct arb_vp_machine *m, union instruction op )
static void do_LOG( struct arb_vp_machine *m, union instruction op )
{
- GLfloat *result = m->reg[op.vec.dst];
- const GLfloat *arg0 = m->reg[op.vec.arg0];
+ GLfloat *result = m->reg[op.alu.dst];
+ const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
GLfloat tmp = FABSF(arg0[0]);
int exponent;
GLfloat mantissa = FREXPF(tmp, &exponent);
@@ -636,25 +487,11 @@ static void do_LOG( struct arb_vp_machine *m, union instruction op )
result[3] = 1.0;
}
-
-static void do_MAD( struct arb_vp_machine *m, union instruction op )
-{
- GLfloat *result = m->reg[op.vec.dst];
- const GLfloat *arg0 = m->reg[op.vec.arg0];
- const GLfloat *arg1 = m->reg[op.vec.arg1];
- const GLfloat *arg2 = m->reg[op.vec.arg2];
-
- result[0] = arg0[0] * arg1[0] + arg2[0];
- result[1] = arg0[1] * arg1[1] + arg2[1];
- result[2] = arg0[2] * arg1[2] + arg2[2];
- result[3] = arg0[3] * arg1[3] + arg2[3];
-}
-
static void do_MAX( struct arb_vp_machine *m, union instruction op )
{
- GLfloat *result = m->reg[op.vec.dst];
- const GLfloat *arg0 = m->reg[op.vec.arg0];
- const GLfloat *arg1 = m->reg[op.vec.arg1];
+ GLfloat *result = m->reg[op.alu.dst];
+ const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
+ const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
result[0] = (arg0[0] > arg1[0]) ? arg0[0] : arg1[0];
result[1] = (arg0[1] > arg1[1]) ? arg0[1] : arg1[1];
@@ -665,9 +502,9 @@ static void do_MAX( struct arb_vp_machine *m, union instruction op )
static void do_MIN( struct arb_vp_machine *m, union instruction op )
{
- GLfloat *result = m->reg[op.vec.dst];
- const GLfloat *arg0 = m->reg[op.vec.arg0];
- const GLfloat *arg1 = m->reg[op.vec.arg1];
+ GLfloat *result = m->reg[op.alu.dst];
+ const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
+ const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
result[0] = (arg0[0] < arg1[0]) ? arg0[0] : arg1[0];
result[1] = (arg0[1] < arg1[1]) ? arg0[1] : arg1[1];
@@ -677,8 +514,8 @@ static void do_MIN( struct arb_vp_machine *m, union instruction op )
static void do_MOV( struct arb_vp_machine *m, union instruction op )
{
- GLfloat *result = m->reg[op.vec.dst];
- const GLfloat *arg0 = m->reg[op.vec.arg0];
+ GLfloat *result = m->reg[op.alu.dst];
+ const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
result[0] = arg0[0];
result[1] = arg0[1];
@@ -688,9 +525,9 @@ static void do_MOV( struct arb_vp_machine *m, union instruction op )
static void do_MUL( struct arb_vp_machine *m, union instruction op )
{
- GLfloat *result = m->reg[op.vec.dst];
- const GLfloat *arg0 = m->reg[op.vec.arg0];
- const GLfloat *arg1 = m->reg[op.vec.arg1];
+ GLfloat *result = m->reg[op.alu.dst];
+ const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
+ const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
result[0] = arg0[0] * arg1[0];
result[1] = arg0[1] * arg1[1];
@@ -701,18 +538,30 @@ static void do_MUL( struct arb_vp_machine *m, union instruction op )
static void do_POW( struct arb_vp_machine *m, union instruction op )
{
- GLfloat *result = m->reg[op.scl.dst];
- const GLfloat *arg0 = m->reg[op.scl.arg0];
- const GLfloat *arg1 = m->reg[op.scl.arg1];
+ GLfloat *result = m->reg[op.alu.dst];
+ const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
+ const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
result[0] = (GLfloat)RoughApproxPower(arg0[0], arg1[0]);
PUFF(result);
}
+static void do_REL( struct arb_vp_machine *m, union instruction op )
+{
+ GLfloat *result = m->reg[op.alu.dst];
+ GLuint idx = (op.alu.idx0 + (GLint)m->reg[REG_ADDR][0]) & (MAX_NV_VERTEX_PROGRAM_PARAMS-1);
+ const GLfloat *arg0 = m->File[op.alu.file0][idx];
+
+ result[0] = arg0[0];
+ result[1] = arg0[1];
+ result[2] = arg0[2];
+ result[3] = arg0[3];
+}
+
static void do_RCP( struct arb_vp_machine *m, union instruction op )
{
- GLfloat *result = m->reg[op.scl.dst];
- const GLfloat *arg0 = m->reg[op.scl.arg0];
+ GLfloat *result = m->reg[op.alu.dst];
+ const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
result[0] = 1.0F / arg0[0];
PUFF(result);
@@ -720,8 +569,8 @@ static void do_RCP( struct arb_vp_machine *m, union instruction op )
static void do_RSQ( struct arb_vp_machine *m, union instruction op )
{
- GLfloat *result = m->reg[op.scl.dst];
- const GLfloat *arg0 = m->reg[op.scl.arg0];
+ GLfloat *result = m->reg[op.alu.dst];
+ const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
result[0] = INV_SQRTF(FABSF(arg0[0]));
PUFF(result);
@@ -730,9 +579,9 @@ static void do_RSQ( struct arb_vp_machine *m, union instruction op )
static void do_SGE( struct arb_vp_machine *m, union instruction op )
{
- GLfloat *result = m->reg[op.vec.dst];
- const GLfloat *arg0 = m->reg[op.vec.arg0];
- const GLfloat *arg1 = m->reg[op.vec.arg1];
+ GLfloat *result = m->reg[op.alu.dst];
+ const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
+ const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
result[0] = (arg0[0] >= arg1[0]) ? 1.0F : 0.0F;
result[1] = (arg0[1] >= arg1[1]) ? 1.0F : 0.0F;
@@ -743,9 +592,9 @@ static void do_SGE( struct arb_vp_machine *m, union instruction op )
static void do_SLT( struct arb_vp_machine *m, union instruction op )
{
- GLfloat *result = m->reg[op.vec.dst];
- const GLfloat *arg0 = m->reg[op.vec.arg0];
- const GLfloat *arg1 = m->reg[op.vec.arg1];
+ GLfloat *result = m->reg[op.alu.dst];
+ const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
+ const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
result[0] = (arg0[0] < arg1[0]) ? 1.0F : 0.0F;
result[1] = (arg0[1] < arg1[1]) ? 1.0F : 0.0F;
@@ -753,29 +602,11 @@ static void do_SLT( struct arb_vp_machine *m, union instruction op )
result[3] = (arg0[3] < arg1[3]) ? 1.0F : 0.0F;
}
-static void do_SWZ( struct arb_vp_machine *m, union instruction op )
-{
- GLfloat *result = m->reg[op.swz.dst];
- const GLfloat *arg0 = m->reg[op.swz.arg0];
- GLuint swz = op.swz.swz;
- GLuint neg = op.swz.neg;
- GLuint i;
-
- for (i = 0; i < 4; i++, swz >>= 3, neg >>= 1) {
- switch (swz & 0x7) {
- case SWIZZLE_ZERO: result[i] = 0.0; break;
- case SWIZZLE_ONE: result[i] = 1.0; break;
- default: result[i] = arg0[swz & 0x7]; break;
- }
- if (neg & 0x1) result[i] = -result[i];
- }
-}
-
static void do_SUB( struct arb_vp_machine *m, union instruction op )
{
- GLfloat *result = m->reg[op.vec.dst];
- const GLfloat *arg0 = m->reg[op.vec.arg0];
- const GLfloat *arg1 = m->reg[op.vec.arg1];
+ GLfloat *result = m->reg[op.alu.dst];
+ const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
+ const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
result[0] = arg0[0] - arg1[0];
result[1] = arg0[1] - arg1[1];
@@ -786,9 +617,9 @@ static void do_SUB( struct arb_vp_machine *m, union instruction op )
static void do_XPD( struct arb_vp_machine *m, union instruction op )
{
- GLfloat *result = m->reg[op.vec.dst];
- const GLfloat *arg0 = m->reg[op.vec.arg0];
- const GLfloat *arg1 = m->reg[op.vec.arg1];
+ GLfloat *result = m->reg[op.alu.dst];
+ const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
+ const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
result[0] = arg0[1] * arg1[2] - arg0[2] * arg1[1];
result[1] = arg0[2] * arg1[0] - arg0[0] * arg1[2];
@@ -801,20 +632,6 @@ static void do_NOP( struct arb_vp_machine *m, union instruction op )
/* Some useful debugging functions:
*/
-static void print_reg( GLuint reg )
-{
- if (reg == REG_RES)
- _mesa_printf("RES");
- else if (reg >= REG_ARG0 && reg <= REG_ARG2)
- _mesa_printf("ARG%d", reg - REG_ARG0);
- else if (reg >= REG_TMP0 && reg < REG_TMP_MAX)
- _mesa_printf("TMP%d", reg - REG_TMP0);
- else if (reg >= REG_PAR0 && reg < REG_PAR_MAX)
- _mesa_printf("PAR%d", reg - REG_PAR0);
- else
- _mesa_printf("???");
-}
-
static void print_mask( GLuint mask )
{
_mesa_printf(".");
@@ -824,45 +641,38 @@ static void print_mask( GLuint mask )
if (mask&0x8) _mesa_printf("w");
}
-static void print_extern( GLuint file, GLuint idx )
+static void print_reg( GLuint file, GLuint reg )
{
static const char *reg_file[] = {
- "TEMPORARY",
- "INPUT",
- "OUTPUT",
+ "REG",
"LOCAL_PARAM",
"ENV_PARAM",
- "NAMED_PARAM",
"STATE_VAR",
- "WRITE_ONLY",
- "ADDRESS"
};
- _mesa_printf("%s:%d", reg_file[file], idx);
-}
-
-
-
-static void print_SWZ( union instruction op, const struct opcode_info *info )
-{
- GLuint swz = op.swz.swz;
- GLuint neg = op.swz.neg;
- GLuint i;
-
- _mesa_printf("%s ", info->string);
- print_reg(op.swz.dst);
- _mesa_printf(", ");
- print_reg(op.swz.arg0);
- _mesa_printf(".");
- for (i = 0; i < 4; i++, swz >>= 3, neg >>= 1) {
- const char *cswz = "xyzw01??";
- if (neg & 0x1)
- _mesa_printf("-");
- _mesa_printf("%c", cswz[swz&0x7]);
+ if (file == 0) {
+ if (reg == REG_RES)
+ _mesa_printf("RES");
+ else if (reg >= REG_ARG0 && reg <= REG_ARG1)
+ _mesa_printf("ARG%d", reg - REG_ARG0);
+ else if (reg >= REG_TMP0 && reg <= REG_TMP11)
+ _mesa_printf("TMP%d", reg - REG_TMP0);
+ else if (reg >= REG_IN0 && reg <= REG_IN15)
+ _mesa_printf("IN%d", reg - REG_IN0);
+ else if (reg >= REG_OUT0 && reg <= REG_OUT14)
+ _mesa_printf("OUT%d", reg - REG_OUT0);
+ else if (reg == REG_ADDR)
+ _mesa_printf("ADDR");
+ else if (reg == REG_ID)
+ _mesa_printf("ID");
+ else
+ _mesa_printf("REG%d", reg);
}
- _mesa_printf("\n");
+ else
+ _mesa_printf("%s:%d", reg_file[file], reg);
}
+
static void print_RSW( union instruction op, const struct opcode_info *info )
{
GLuint swz = op.rsw.swz;
@@ -870,13 +680,13 @@ static void print_RSW( union instruction op, const struct opcode_info *info )
GLuint i;
_mesa_printf("%s ", info->string);
- print_reg(op.rsw.dst);
+ print_reg(0, op.rsw.dst);
_mesa_printf(", ");
- print_reg(op.rsw.arg0);
+ print_reg(op.rsw.file0, op.rsw.idx0);
_mesa_printf(".");
for (i = 0; i < 4; i++, swz >>= 2) {
const char *cswz = "xyzw";
- if (neg)
+ if (neg & (1<<i))
_mesa_printf("-");
_mesa_printf("%c", cswz[swz&0x3]);
}
@@ -884,193 +694,203 @@ static void print_RSW( union instruction op, const struct opcode_info *info )
}
-static void print_SCL( union instruction op, const struct opcode_info *info )
-{
- _mesa_printf("%s ", info->string);
- print_reg(op.scl.dst);
- _mesa_printf(", ");
- print_reg(op.scl.arg0);
- if (info->nr_args > 1) {
- _mesa_printf(", ");
- print_reg(op.scl.arg1);
- }
- _mesa_printf("\n");
-}
-
-
-static void print_VEC( union instruction op, const struct opcode_info *info )
+static void print_ALU( union instruction op, const struct opcode_info *info )
{
_mesa_printf("%s ", info->string);
- print_reg(op.vec.dst);
+ print_reg(0, op.alu.dst);
_mesa_printf(", ");
- print_reg(op.vec.arg0);
+ print_reg(op.alu.file0, op.alu.idx0);
if (info->nr_args > 1) {
_mesa_printf(", ");
- print_reg(op.vec.arg1);
- }
- if (info->nr_args > 2) {
- _mesa_printf(", ");
- print_reg(op.vec.arg2);
+ print_reg(op.alu.file1, op.alu.idx1);
}
_mesa_printf("\n");
}
-static void print_MSK( union instruction op, const struct opcode_info *info )
+static void print_SEL( union instruction op, const struct opcode_info *info )
{
_mesa_printf("%s ", info->string);
- print_reg(op.msk.dst);
- print_mask(op.msk.mask);
+ print_reg(0, op.sel.dst);
_mesa_printf(", ");
- print_reg(op.msk.arg0);
- _mesa_printf("\n");
-}
-
-static void print_IN( union instruction op, const struct opcode_info *info )
-{
- _mesa_printf("%s ", info->string);
- print_reg(op.inr.reg);
+ print_reg(0, op.sel.idx0);
+ print_mask(op.sel.mask);
_mesa_printf(", ");
- print_extern(op.inr.file, op.inr.idx);
+ print_reg(op.sel.file1, op.sel.idx1);
+ print_mask(~op.sel.mask);
_mesa_printf("\n");
}
-static void print_OUT( union instruction op, const struct opcode_info *info )
-{
- _mesa_printf("%s ", info->string);
- print_extern(op.out.file, op.out.idx);
- if (op.out.opcode == OUM)
- print_mask(op.out.mask);
- _mesa_printf(", ");
- print_reg(op.out.reg);
- _mesa_printf("\n");
-}
static void print_NOP( union instruction op, const struct opcode_info *info )
{
}
#define NOP 0
-#define VEC 1
-#define SCL 2
-#define SWZ 3
+#define ALU 1
+#define SWZ 2
static const struct opcode_info opcode_info[] =
{
- { VEC, 1, "ABS", do_ABS, print_VEC },
- { VEC, 2, "ADD", do_ADD, print_VEC },
- { OUT, 1, "ARL", do_ARL, print_OUT },
- { SCL, 2, "DP3", do_DP3, print_SCL },
- { SCL, 2, "DP4", do_DP4, print_SCL },
- { SCL, 2, "DPH", do_DPH, print_SCL },
- { VEC, 2, "DST", do_DST, print_VEC },
- { NOP, 0, "END", do_NOP, print_NOP },
- { SCL, 1, "EX2", do_EX2, print_VEC },
- { VEC, 1, "EXP", do_EXP, print_VEC },
- { VEC, 1, "FLR", do_FLR, print_VEC },
- { VEC, 1, "FRC", do_FRC, print_VEC },
- { SCL, 1, "LG2", do_LG2, print_VEC },
- { VEC, 1, "LIT", do_LIT, print_VEC },
- { VEC, 1, "LOG", do_LOG, print_VEC },
- { VEC, 3, "MAD", do_MAD, print_VEC },
- { VEC, 2, "MAX", do_MAX, print_VEC },
- { VEC, 2, "MIN", do_MIN, print_VEC },
- { VEC, 1, "MOV", do_MOV, print_VEC },
- { VEC, 2, "MUL", do_MUL, print_VEC },
- { SCL, 2, "POW", do_POW, print_VEC },
- { VEC, 1, "PRT", do_PRT, print_VEC }, /* PRINT */
- { NOP, 1, "RCC", do_NOP, print_NOP },
- { SCL, 1, "RCP", do_RCP, print_VEC },
- { SCL, 1, "RSQ", do_RSQ, print_VEC },
- { VEC, 2, "SGE", do_SGE, print_VEC },
- { VEC, 2, "SLT", do_SLT, print_VEC },
- { VEC, 2, "SUB", do_SUB, print_VEC },
- { SWZ, 1, "SWZ", do_SWZ, print_SWZ },
- { VEC, 2, "XPD", do_XPD, print_VEC },
- { IN4, 1, "IN1", do_IN1, print_IN }, /* Internals */
- { IN4, 1, "IN2", do_IN2, print_IN },
- { IN4, 1, "IN3", do_IN3, print_IN },
- { IN4, 1, "IN4", do_IN4, print_IN },
- { OUT, 1, "OUT", do_OUT, print_OUT },
- { OUT, 1, "OUM", do_OUM, print_OUT },
- { SWZ, 1, "RSW", do_RSW, print_RSW },
- { MSK, 1, "MSK", do_MSK, print_MSK },
- { IN4, 1, "PAR", do_PAR, print_IN },
- { IN4, 1, "PRL", do_PRL, print_IN },
+ { 1, "ABS", print_ALU },
+ { 2, "ADD", print_ALU },
+ { 1, "ARL", print_ALU },
+ { 2, "DP3", print_ALU },
+ { 2, "DP4", print_ALU },
+ { 2, "DPH", print_ALU },
+ { 2, "DST", print_ALU },
+ { 0, "END", print_NOP },
+ { 1, "EX2", print_ALU },
+ { 1, "EXP", print_ALU },
+ { 1, "FLR", print_ALU },
+ { 1, "FRC", print_ALU },
+ { 1, "LG2", print_ALU },
+ { 1, "LIT", print_ALU },
+ { 1, "LOG", print_ALU },
+ { 3, "MAD", print_NOP },
+ { 2, "MAX", print_ALU },
+ { 2, "MIN", print_ALU },
+ { 1, "MOV", print_ALU },
+ { 2, "MUL", print_ALU },
+ { 2, "POW", print_ALU },
+ { 1, "PRT", print_ALU }, /* PRINT */
+ { 1, "RCC", print_NOP },
+ { 1, "RCP", print_ALU },
+ { 1, "RSQ", print_ALU },
+ { 2, "SGE", print_ALU },
+ { 2, "SLT", print_ALU },
+ { 2, "SUB", print_ALU },
+ { 1, "SWZ", print_NOP },
+ { 2, "XPD", print_ALU },
+ { 1, "RSW", print_RSW },
+ { 2, "SEL", print_SEL },
+ { 1, "REL", print_ALU },
};
-static GLuint cvp_load_reg( struct compilation *cp,
- GLuint file,
- GLuint index,
- GLuint rel )
+static void (* const opcode_func[])(struct arb_vp_machine *, union instruction) =
+{
+ do_ABS,
+ do_ADD,
+ do_ARL,
+ do_DP3,
+ do_DP4,
+ do_DPH,
+ do_DST,
+ do_NOP,
+ do_EX2,
+ do_EXP,
+ do_FLR,
+ do_FRC,
+ do_LG2,
+ do_LIT,
+ do_LOG,
+ do_NOP,
+ do_MAX,
+ do_MIN,
+ do_MOV,
+ do_MUL,
+ do_POW,
+ do_PRT,
+ do_NOP,
+ do_RCP,
+ do_RSQ,
+ do_SGE,
+ do_SLT,
+ do_SUB,
+ do_RSW,
+ do_XPD,
+ do_RSW,
+ do_SEL,
+ do_REL,
+};
+
+static union instruction *cvp_next_instruction( struct compilation *cp )
{
- GLuint i, op;
+ union instruction *op = cp->csr++;
+ op->dword = 0;
+ return op;
+}
- if (file == PROGRAM_TEMPORARY)
- return index + REG_TMP0;
+static struct reg cvp_make_reg( GLuint file, GLuint idx )
+{
+ struct reg reg;
+ reg.file = file;
+ reg.idx = idx;
+ return reg;
+}
- /* Don't try to cache relatively addressed values yet:
- */
- if (!rel) {
- for (i = 0; i < REG_PAR_NR; i++) {
- if ((cp->par_active & (1<<i)) &&
- cp->reg[i].file == file &&
- cp->reg[i].idx == index) {
- cp->par_protected |= (1<<i);
- return i + REG_PAR0;
- }
- }
- }
+static struct reg cvp_emit_rel( struct compilation *cp,
+ struct reg reg,
+ struct reg tmpreg )
+{
+ union instruction *op = cvp_next_instruction(cp);
+ op->alu.opcode = REL;
+ op->alu.file0 = reg.file;
+ op->alu.idx0 = reg.idx;
+ op->alu.dst = tmpreg.idx;
+ return tmpreg;
+}
- /* Not already loaded, so identify a slot and load it.
- * TODO: preload these values once only!
- * TODO: better eviction strategy!
- */
- if (cp->par_active == ~0) {
- assert(cp->par_protected != ~0);
- cp->par_active = cp->par_protected;
- }
- i = ffs(~cp->par_active);
- assert(i);
- i--;
+static struct reg cvp_load_reg( struct compilation *cp,
+ GLuint file,
+ GLuint index,
+ GLuint rel,
+ GLuint tmpidx )
+{
+ struct reg tmpreg = cvp_make_reg(FILE_REG, tmpidx);
+ struct reg reg;
+ switch (file) {
+ case PROGRAM_TEMPORARY:
+ return cvp_make_reg(FILE_REG, REG_TMP0 + index);
- if (file == PROGRAM_INPUT)
- op = IN1 + cp->VB->AttribPtr[index]->size - 1;
- else if (rel)
- op = PRL;
- else
- op = PAR;
-
- cp->csr->dword = 0;
- cp->csr->inr.opcode = op;
- cp->csr->inr.reg = i + REG_PAR0;
- cp->csr->inr.file = file;
- cp->csr->inr.idx = index;
- cp->csr++;
-
- cp->reg[i].file = file;
- cp->reg[i].idx = index;
- cp->par_protected |= (1<<i);
- cp->par_active |= (1<<i);
- return i + REG_PAR0;
-}
-
-static void cvp_release_regs( struct compilation *cp )
-{
- cp->par_protected = 0;
-}
+ case PROGRAM_INPUT:
+ return cvp_make_reg(FILE_REG, REG_IN0 + index);
+ case PROGRAM_OUTPUT:
+ return cvp_make_reg(FILE_REG, REG_OUT0 + index);
+ /* These two aren't populated by the parser?
+ */
+ case PROGRAM_LOCAL_PARAM:
+ reg = cvp_make_reg(FILE_LOCAL_PARAM, index);
+ if (rel)
+ return cvp_emit_rel(cp, reg, tmpreg);
+ else
+ return reg;
+
+ case PROGRAM_ENV_PARAM:
+ reg = cvp_make_reg(FILE_ENV_PARAM, index);
+ if (rel)
+ return cvp_emit_rel(cp, reg, tmpreg);
+ else
+ return reg;
+
+ case PROGRAM_STATE_VAR:
+ reg = cvp_make_reg(FILE_STATE_PARAM, index);
+ if (rel)
+ return cvp_emit_rel(cp, reg, tmpreg);
+ else
+ return reg;
+
+ /* Invalid values:
+ */
+ case PROGRAM_WRITE_ONLY:
+ case PROGRAM_ADDRESS:
+ default:
+ assert(0);
+ return tmpreg; /* can't happen */
+ }
+}
-static GLuint cvp_emit_arg( struct compilation *cp,
- const struct vp_src_register *src,
- GLuint arg )
+static struct reg cvp_emit_arg( struct compilation *cp,
+ const struct vp_src_register *src,
+ GLuint arg )
{
- GLuint reg = cvp_load_reg( cp, src->File, src->Index, src->RelAddr );
+ struct reg reg = cvp_load_reg( cp, src->File, src->Index, src->RelAddr, arg );
union instruction rsw, noop;
-
+
/* Emit any necessary swizzling.
*/
rsw.dword = 0;
@@ -1088,12 +908,13 @@ static GLuint cvp_emit_arg( struct compilation *cp,
(3<<6));
if (rsw.dword != noop.dword) {
- GLuint rsw_reg = arg;
- cp->csr->dword = rsw.dword;
- cp->csr->rsw.opcode = RSW;
- cp->csr->rsw.arg0 = reg;
- cp->csr->rsw.dst = rsw_reg;
- cp->csr++;
+ union instruction *op = cvp_next_instruction(cp);
+ struct reg rsw_reg = cvp_make_reg(FILE_REG, REG_ARG0 + arg);
+ op->dword = rsw.dword;
+ op->rsw.opcode = RSW;
+ op->rsw.file0 = reg.file;
+ op->rsw.idx0 = reg.idx;
+ op->rsw.dst = rsw_reg.idx;
return rsw_reg;
}
else
@@ -1102,48 +923,82 @@ static GLuint cvp_emit_arg( struct compilation *cp,
static GLuint cvp_choose_result( struct compilation *cp,
const struct vp_dst_register *dst,
- union instruction *fixup,
- GLuint maxreg)
+ union instruction *fixup )
{
GLuint mask = dst->WriteMask;
+ GLuint idx;
- if (dst->File == PROGRAM_TEMPORARY) {
-
- /* Optimization: When writing (with a writemask) to an undefined
- * value for the first time, the writemask may be ignored. In
- * practise this means that the MSK instruction to implement the
- * writemask can be dropped.
+ switch (dst->File) {
+ case PROGRAM_TEMPORARY:
+ idx = REG_TMP0 + dst->Index;
+ break;
+ case PROGRAM_OUTPUT:
+ idx = REG_OUT0 + dst->Index;
+ break;
+ default:
+ assert(0);
+ return REG_RES; /* can't happen */
+ }
+
+ /* Optimization: When writing (with a writemask) to an undefined
+ * value for the first time, the writemask may be ignored.
+ */
+ if (mask != WRITEMASK_XYZW && (cp->reg_active & (1 << idx))) {
+ fixup->sel.opcode = SEL;
+ fixup->sel.idx0 = REG_RES;
+ fixup->sel.file1 = FILE_REG;
+ fixup->sel.idx1 = idx;
+ fixup->sel.dst = idx;
+ fixup->sel.mask = mask;
+ cp->reg_active |= 1 << idx;
+ return REG_RES;
+ }
+ else {
+ fixup->dword = 0;
+ cp->reg_active |= 1 << idx;
+ return idx;
+ }
+}
+
+#define RSW_NOOP ((0<<0) | (1<<2) | (2<<4) | (3<<6))
+
+static struct reg cvp_emit_rsw( struct compilation *cp,
+ GLuint dst,
+ struct reg src,
+ GLuint neg,
+ GLuint swz,
+ GLboolean force)
+{
+ struct reg retval;
+
+ if (swz != RSW_NOOP || neg != 0) {
+ union instruction *op = cvp_next_instruction(cp);
+ op->rsw.opcode = RSW;
+ op->rsw.dst = dst;
+ op->rsw.file0 = src.file;
+ op->rsw.idx0 = src.idx;
+ op->rsw.neg = neg;
+ op->rsw.swz = swz;
+
+ retval.file = FILE_REG;
+ retval.idx = dst;
+ return retval;
+ }
+ else if (force) {
+ /* Oops. Degenerate case:
*/
- if (dst->Index < maxreg &&
- (mask == 0xf || !(cp->tmp_active & (1<<dst->Index)))) {
- fixup->dword = 0;
- cp->tmp_active |= (1<<dst->Index);
- return REG_TMP0 + dst->Index;
- }
- else if (mask != 0xf) {
- fixup->msk.opcode = MSK;
- fixup->msk.arg0 = REG_RES;
- fixup->msk.dst = REG_TMP0 + dst->Index;
- fixup->msk.mask = mask;
- cp->tmp_active |= (1<<dst->Index);
- return REG_RES;
- }
- else {
- fixup->vec.opcode = VP_OPCODE_MOV;
- fixup->vec.arg0 = REG_RES;
- fixup->vec.dst = REG_TMP0 + dst->Index;
- cp->tmp_active |= (1<<dst->Index);
- return REG_RES;
- }
+ union instruction *op = cvp_next_instruction(cp);
+ op->alu.opcode = VP_OPCODE_MOV;
+ op->alu.dst = dst;
+ op->alu.file0 = src.file;
+ op->alu.idx0 = src.idx;
+
+ retval.file = FILE_REG;
+ retval.idx = dst;
+ return retval;
}
else {
- assert(dst->File == PROGRAM_OUTPUT);
- fixup->out.opcode = (mask == 0xf) ? OUT : OUM;
- fixup->out.reg = REG_RES;
- fixup->out.file = dst->File;
- fixup->out.idx = dst->Index;
- fixup->out.mask = mask;
- return REG_RES;
+ return src;
}
}
@@ -1152,76 +1007,129 @@ static void cvp_emit_inst( struct compilation *cp,
const struct vp_instruction *inst )
{
const struct opcode_info *info = &opcode_info[inst->Opcode];
+ union instruction *op;
union instruction fixup;
- GLuint reg[3];
+ struct reg reg[3];
GLuint result, i;
/* Need to handle SWZ, ARL specially.
*/
- switch (info->type) {
- case OUT:
- assert(inst->Opcode == VP_OPCODE_ARL);
- reg[0] = cvp_emit_arg( cp, &inst->SrcReg[0], REG_ARG0 );
-
- cp->csr->dword = 0;
- cp->csr->out.opcode = inst->Opcode;
- cp->csr->out.reg = reg[0];
- cp->csr->out.file = PROGRAM_ADDRESS;
- cp->csr->out.idx = 0;
+ switch (inst->Opcode) {
+ /* Split into mul and add:
+ */
+ case VP_OPCODE_MAD:
+ result = cvp_choose_result( cp, &inst->DstReg, &fixup );
+ for (i = 0; i < 3; i++)
+ reg[i] = cvp_emit_arg( cp, &inst->SrcReg[i], REG_ARG0+i );
+
+ op = cvp_next_instruction(cp);
+ op->alu.opcode = VP_OPCODE_MUL;
+ op->alu.file0 = reg[0].file;
+ op->alu.idx0 = reg[0].idx;
+ op->alu.file1 = reg[1].file;
+ op->alu.idx1 = reg[1].idx;
+ op->alu.dst = REG_ARG0;
+
+ op = cvp_next_instruction(cp);
+ op->alu.opcode = VP_OPCODE_ADD;
+ op->alu.file0 = FILE_REG;
+ op->alu.idx0 = REG_ARG0;
+ op->alu.file1 = reg[2].file;
+ op->alu.idx1 = reg[2].idx;
+ op->alu.dst = result;
break;
- case SWZ:
- assert(inst->Opcode == VP_OPCODE_SWZ);
- result = cvp_choose_result( cp, &inst->DstReg, &fixup, REG_SWZDST_MAX );
+ case VP_OPCODE_ARL:
reg[0] = cvp_emit_arg( cp, &inst->SrcReg[0], REG_ARG0 );
- cp->csr->dword = 0;
- cp->csr->swz.opcode = VP_OPCODE_SWZ;
- cp->csr->swz.arg0 = reg[0];
- cp->csr->swz.dst = result;
- cp->csr->swz.neg = inst->SrcReg[0].Negate;
- cp->csr->swz.swz = inst->SrcReg[0].Swizzle;
- cp->csr++;
-
- if (result == REG_RES) {
- cp->csr->dword = fixup.dword;
- cp->csr++;
- }
+ op = cvp_next_instruction(cp);
+ op->alu.opcode = inst->Opcode;
+ op->alu.dst = REG_ADDR;
+ op->alu.file0 = reg[0].file;
+ op->alu.idx0 = reg[0].idx;
break;
- case VEC:
- case SCL: /* for now */
- result = cvp_choose_result( cp, &inst->DstReg, &fixup, REG_MAX );
+ case VP_OPCODE_SWZ: {
+ GLuint swz0, swz1;
+ GLuint neg0, neg1;
+ GLuint mask = 0;
- reg[0] = reg[1] = reg[2] = 0;
+ /* Translate 3-bit-per-element swizzle into two 2-bit swizzles,
+ * one from the source register the other from a constant
+ * {0,0,0,1}.
+ */
+ for (i = 0; i < 4; i++) {
+ GLuint swzelt = GET_SWZ(inst->SrcReg[0].Swizzle, i);
+ if (swzelt >= SWIZZLE_ZERO) {
+ neg0 |= inst->SrcReg[0].Negate & (1<<i);
+ if (swzelt == SWIZZLE_ONE)
+ swz0 |= SWIZZLE_W << (i*2);
+ else if (i < SWIZZLE_W)
+ swz0 |= i << (i*2);
+ }
+ else {
+ mask |= 1<<i;
+ neg1 |= inst->SrcReg[0].Negate & (1<<i);
+ swz1 |= swzelt << (i*2);
+ }
+ }
- for (i = 0; i < info->nr_args; i++)
- reg[i] = cvp_emit_arg( cp, &inst->SrcReg[i], REG_ARG0 + i );
+ result = cvp_choose_result( cp, &inst->DstReg, &fixup );
+ reg[0].file = FILE_REG;
+ reg[0].idx = REG_ID;
+ reg[1] = cvp_emit_arg( cp, &inst->SrcReg[0], REG_ARG0 );
- cp->csr->dword = 0;
- cp->csr->vec.opcode = inst->Opcode;
- cp->csr->vec.arg0 = reg[0];
- cp->csr->vec.arg1 = reg[1];
- cp->csr->vec.arg2 = reg[2];
- cp->csr->vec.dst = result;
- cp->csr++;
+ if (mask == WRITEMASK_XYZW) {
+ cvp_emit_rsw(cp, result, reg[0], neg0, swz0, GL_TRUE);
+
+ }
+ else if (mask == 0) {
+ cvp_emit_rsw(cp, result, reg[1], neg1, swz1, GL_TRUE);
+ }
+ else {
+ reg[0] = cvp_emit_rsw(cp, REG_ARG0, reg[0], neg0, swz0, GL_FALSE);
+ reg[1] = cvp_emit_rsw(cp, REG_ARG1, reg[1], neg1, swz1, GL_FALSE);
+
+ assert(reg[0].file == FILE_REG);
+
+ op = cvp_next_instruction(cp);
+ op->sel.opcode = SEL;
+ op->sel.dst = result;
+ op->sel.idx0 = reg[0].idx;
+ op->sel.file1 = reg[1].file;
+ op->sel.idx1 = reg[1].idx;
+ op->sel.mask = mask;
+ }
if (result == REG_RES) {
- cp->csr->dword = fixup.dword;
- cp->csr++;
- }
+ op = cvp_next_instruction(cp);
+ op->dword = fixup.dword;
+ }
break;
-
-
- case NOP:
+ }
+ case VP_OPCODE_PRINT:
+ case VP_OPCODE_END:
break;
default:
- assert(0);
+ result = cvp_choose_result( cp, &inst->DstReg, &fixup );
+ for (i = 0; i < info->nr_args; i++)
+ reg[i] = cvp_emit_arg( cp, &inst->SrcReg[i], REG_ARG0 + i );
+
+ op = cvp_next_instruction(cp);
+ op->alu.opcode = inst->Opcode;
+ op->alu.file0 = reg[0].file;
+ op->alu.idx0 = reg[0].idx;
+ op->alu.file1 = reg[1].file;
+ op->alu.idx1 = reg[1].idx;
+ op->alu.dst = result;
+
+ if (result == REG_RES) {
+ op = cvp_next_instruction(cp);
+ op->dword = fixup.dword;
+ }
break;
}
-
- cvp_release_regs( cp );
}
@@ -1254,7 +1162,7 @@ static void compile_vertex_program( struct arb_vp_machine *m,
if (DISASSEM) {
for (i = 0; i < m->nr_instructions; i++) {
union instruction insn = m->instructions[i];
- const struct opcode_info *info = &opcode_info[insn.vec.opcode];
+ const struct opcode_info *info = &opcode_info[insn.alu.opcode];
info->print( insn, info );
}
_mesa_printf("\n\n");
@@ -1390,15 +1298,62 @@ run_arb_vertex_program(GLcontext *ctx, struct tnl_pipeline_stage *stage)
if (program->Parameters) {
_mesa_load_state_parameters(ctx, program->Parameters);
- m->File[PROGRAM_STATE_VAR] = program->Parameters->ParameterValues;
}
+
+
+ /* Initialize regs where necessary:
+ */
+ ASSIGN_4V(m->reg[REG_ID], 0, 0, 0, 1);
+
+ m->nr_inputs = m->nr_outputs = 0;
+
+ for (i = 0; i < 16; i++) {
+ if (program->InputsRead & (1<<i)) {
+ GLuint j = m->nr_inputs++;
+ m->input[j].idx = i;
+ m->input[j].data = m->VB->AttribPtr[i]->data;
+ m->input[j].stride = m->VB->AttribPtr[i]->stride;
+ m->input[j].size = m->VB->AttribPtr[i]->size;
+ ASSIGN_4V(m->reg[REG_IN0 + i], 0, 0, 0, 1);
+ }
+ }
+
+ for (i = 0; i < 15; i++) {
+ if (program->OutputsWritten & (1<<i)) {
+ GLuint j = m->nr_outputs++;
+ m->output[j].idx = i;
+ m->output[j].data = m->attribs[i].data;
+ }
+ }
+
/* Run the actual program:
*/
for (m->vtx_nr = 0; m->vtx_nr < VB->Count; m->vtx_nr++) {
+ for (j = 0; j < m->nr_inputs; j++) {
+ GLuint idx = REG_IN0 + m->input[j].idx;
+ switch (m->input[j].size) {
+ case 4: m->reg[idx][3] = m->input[j].data[3];
+ case 3: m->reg[idx][2] = m->input[j].data[2];
+ case 2: m->reg[idx][1] = m->input[j].data[1];
+ case 1: m->reg[idx][0] = m->input[j].data[0];
+ }
+
+ STRIDE_F(m->input[j].data, m->input[j].stride);
+ }
+
for (j = 0; j < m->nr_instructions; j++) {
union instruction inst = m->instructions[j];
- opcode_info[inst.vec.opcode].func( m, inst );
+ opcode_func[inst.alu.opcode]( m, inst );
+ }
+
+ for (j = 0; j < m->nr_outputs; j++) {
+ GLuint idx = REG_OUT0 + m->output[j].idx;
+ m->output[j].data[0] = m->reg[idx][0];
+ m->output[j].data[1] = m->reg[idx][1];
+ m->output[j].data[2] = m->reg[idx][2];
+ m->output[j].data[3] = m->reg[idx][3];
+ m->output[j].data += 4;
}
}
@@ -1488,9 +1443,10 @@ validate_vertex_program( GLcontext *ctx, struct tnl_pipeline_stage *stage )
/* Grab the state GL state and put into registers:
*/
- m->File[PROGRAM_LOCAL_PARAM] = program->Base.LocalParams;
- m->File[PROGRAM_ENV_PARAM] = ctx->VertexProgram.Parameters;
- m->File[PROGRAM_STATE_VAR] = 0;
+ m->File[FILE_REG] = m->reg;
+ m->File[FILE_LOCAL_PARAM] = program->Base.LocalParams;
+ m->File[FILE_ENV_PARAM] = ctx->VertexProgram.Parameters;
+ m->File[FILE_STATE_PARAM] = program->Parameters->ParameterValues;
}
}