summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/r300/r300_vertprog.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/r300/r300_vertprog.c')
-rw-r--r--src/mesa/drivers/dri/r300/r300_vertprog.c777
1 files changed, 518 insertions, 259 deletions
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c
index 146daa367c..de32013032 100644
--- a/src/mesa/drivers/dri/r300/r300_vertprog.c
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.c
@@ -32,12 +32,17 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/macros.h"
#include "main/enums.h"
#include "shader/program.h"
+#include "shader/programopt.h"
#include "shader/prog_instruction.h"
+#include "shader/prog_optimize.h"
#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
#include "shader/prog_statevars.h"
#include "tnl/tnl.h"
+#include "radeon_nqssadce.h"
#include "r300_context.h"
+#include "r300_state.h"
/* TODO: Get rid of t_src_class call */
#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
@@ -64,20 +69,18 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \
if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \
WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \
- vp->native = GL_FALSE; \
+ vp->error = GL_TRUE; \
} \
u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
} while (0)
-int r300VertexProgUpdateParams(GLcontext * ctx,
- struct r300_vertex_program_cont *vp, float *dst)
+static int r300VertexProgUpdateParams(GLcontext * ctx, struct gl_vertex_program *vp, float *dst)
{
int pi;
- struct gl_vertex_program *mesa_vp = &vp->mesa_program;
float *dst_o = dst;
struct gl_program_parameter_list *paramList;
- if (mesa_vp->IsNVProgram) {
+ if (vp->IsNVProgram) {
_mesa_load_tracked_matrices(ctx);
for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) {
@@ -89,16 +92,18 @@ int r300VertexProgUpdateParams(GLcontext * ctx,
return dst - dst_o;
}
- assert(mesa_vp->Base.Parameters);
- _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
+ if (!vp->Base.Parameters)
+ return 0;
- if (mesa_vp->Base.Parameters->NumParameters * 4 >
+ _mesa_load_state_parameters(ctx, vp->Base.Parameters);
+
+ if (vp->Base.Parameters->NumParameters * 4 >
VSF_MAX_FRAGMENT_LENGTH) {
fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
_mesa_exit(-1);
}
- paramList = mesa_vp->Base.Parameters;
+ paramList = vp->Base.Parameters;
for (pi = 0; pi < paramList->NumParameters; pi++) {
switch (paramList->Parameters[pi].Type) {
case PROGRAM_STATE_VAR:
@@ -214,21 +219,8 @@ static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller)
static unsigned long t_src_index(struct r300_vertex_program *vp,
struct prog_src_register *src)
{
- int i;
- int max_reg = -1;
-
if (src->File == PROGRAM_INPUT) {
- if (vp->inputs[src->Index] != -1)
- return vp->inputs[src->Index];
-
- for (i = 0; i < VERT_ATTRIB_MAX; i++)
- if (vp->inputs[i] > max_reg)
- max_reg = vp->inputs[i];
-
- vp->inputs[src->Index] = max_reg + 1;
-
- //vp_dump_inputs(vp, __FUNCTION__);
-
+ assert(vp->inputs[src->Index] != -1);
return vp->inputs[src->Index];
} else {
if (src->Index < 0) {
@@ -943,60 +935,80 @@ static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp,
static void t_inputs_outputs(struct r300_vertex_program *vp)
{
int i;
- int cur_reg = 0;
+ int cur_reg;
+ GLuint OutputsWritten, InputsRead;
- for (i = 0; i < VERT_ATTRIB_MAX; i++)
- vp->inputs[i] = -1;
+ OutputsWritten = vp->Base->Base.OutputsWritten;
+ InputsRead = vp->Base->Base.InputsRead;
+
+ cur_reg = -1;
+ for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+ if (InputsRead & (1 << i))
+ vp->inputs[i] = ++cur_reg;
+ else
+ vp->inputs[i] = -1;
+ }
+ cur_reg = 0;
for (i = 0; i < VERT_RESULT_MAX; i++)
vp->outputs[i] = -1;
- assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS));
+ assert(OutputsWritten & (1 << VERT_RESULT_HPOS));
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) {
+ if (OutputsWritten & (1 << VERT_RESULT_HPOS)) {
vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
}
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
+ if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
}
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) {
+ /* If we're writing back facing colors we need to send
+ * four colors to make front/back face colors selection work.
+ * If the vertex program doesn't write all 4 colors, lets
+ * pretend it does by skipping output index reg so the colors
+ * get written into appropriate output vectors.
+ */
+ if (OutputsWritten & (1 << VERT_RESULT_COL0)) {
vp->outputs[VERT_RESULT_COL0] = cur_reg++;
+ } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
+ OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+ cur_reg++;
}
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) {
- vp->outputs[VERT_RESULT_COL1] =
- vp->outputs[VERT_RESULT_COL0] + 1;
- cur_reg = vp->outputs[VERT_RESULT_COL1] + 1;
+ if (OutputsWritten & (1 << VERT_RESULT_COL1)) {
+ vp->outputs[VERT_RESULT_COL1] = cur_reg++;
+ } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
+ OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+ cur_reg++;
}
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) {
- vp->outputs[VERT_RESULT_BFC0] =
- vp->outputs[VERT_RESULT_COL0] + 2;
- cur_reg = vp->outputs[VERT_RESULT_BFC0] + 2;
+ if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
+ vp->outputs[VERT_RESULT_BFC0] = cur_reg++;
+ } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+ cur_reg++;
}
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
- vp->outputs[VERT_RESULT_BFC1] =
- vp->outputs[VERT_RESULT_COL0] + 3;
- cur_reg = vp->outputs[VERT_RESULT_BFC1] + 1;
+ if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+ vp->outputs[VERT_RESULT_BFC1] = cur_reg++;
+ } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
+ cur_reg++;
}
for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
- if (vp->key.OutputsWritten & (1 << i)) {
+ if (OutputsWritten & (1 << i)) {
vp->outputs[i] = cur_reg++;
}
}
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) {
+ if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
}
}
-static void r300TranslateVertexShader(struct r300_vertex_program *vp,
- struct prog_instruction *vpi)
+void r300TranslateVertexShader(struct r300_vertex_program *vp)
{
+ struct prog_instruction *vpi = vp->Base->Base.Instructions;
int i;
GLuint *inst;
unsigned long num_operands;
@@ -1007,14 +1019,13 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp,
struct prog_src_register src[3];
vp->pos_end = 0; /* Not supported yet */
- vp->program.length = 0;
- /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */
+ vp->hw_code.length = 0;
vp->translated = GL_TRUE;
- vp->native = GL_TRUE;
+ vp->error = GL_FALSE;
t_inputs_outputs(vp);
- for (inst = vp->program.body.i; vpi->Opcode != OPCODE_END;
+ for (inst = vp->hw_code.body.d; vpi->Opcode != OPCODE_END;
vpi++, inst += 4) {
FREE_TEMPS();
@@ -1176,293 +1187,541 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp,
&u_temp_i);
break;
default:
- assert(0);
+ vp->error = GL_TRUE;
break;
}
}
- /* Some outputs may be artificially added, to match the inputs
- of the fragment program. Blank the outputs here. */
- for (i = 0; i < VERT_RESULT_MAX; i++) {
- if (vp->key.OutputsAdded & (1 << i)) {
- inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
- GL_FALSE,
- GL_FALSE,
- vp->outputs[i],
- VSF_FLAG_ALL,
- PVS_DST_REG_OUT);
- inst[1] = __CONST(0, SWIZZLE_ZERO);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
- inst += 4;
- }
- }
-
- vp->program.length = (inst - vp->program.body.i);
- if (vp->program.length >= VSF_MAX_FRAGMENT_LENGTH) {
- vp->program.length = 0;
- vp->native = GL_FALSE;
+ vp->hw_code.length = (inst - vp->hw_code.body.d);
+ if (vp->hw_code.length >= VSF_MAX_FRAGMENT_LENGTH) {
+ vp->error = GL_TRUE;
}
-#if 0
- fprintf(stderr, "hw program:\n");
- for (i = 0; i < vp->program.length; i++)
- fprintf(stderr, "%08x\n", vp->program.body.d[i]);
-#endif
}
-/* DP4 version seems to trigger some hw peculiarity */
-//#define PREFER_DP4
+static void insert_wpos(struct gl_program *prog, GLuint temp_index, int tex_id)
+{
+ struct prog_instruction *vpi;
+
+ _mesa_insert_instructions(prog, prog->NumInstructions - 1, 2);
+
+ vpi = &prog->Instructions[prog->NumInstructions - 3];
+
+ vpi->Opcode = OPCODE_MOV;
+
+ vpi->DstReg.File = PROGRAM_OUTPUT;
+ vpi->DstReg.Index = VERT_RESULT_HPOS;
+ vpi->DstReg.WriteMask = WRITEMASK_XYZW;
+ vpi->DstReg.CondMask = COND_TR;
-static void position_invariant(struct gl_program *prog)
+ vpi->SrcReg[0].File = PROGRAM_TEMPORARY;
+ vpi->SrcReg[0].Index = temp_index;
+ vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW;
+
+ ++vpi;
+
+ vpi->Opcode = OPCODE_MOV;
+
+ vpi->DstReg.File = PROGRAM_OUTPUT;
+ vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id;
+ vpi->DstReg.WriteMask = WRITEMASK_XYZW;
+ vpi->DstReg.CondMask = COND_TR;
+
+ vpi->SrcReg[0].File = PROGRAM_TEMPORARY;
+ vpi->SrcReg[0].Index = temp_index;
+ vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW;
+
+ ++vpi;
+
+ vpi->Opcode = OPCODE_END;
+}
+
+static void pos_as_texcoord(struct gl_program *prog, int tex_id)
{
struct prog_instruction *vpi;
- struct gl_program_parameter_list *paramList;
- int i;
+ GLuint tempregi = prog->NumTemporaries;
- gl_state_index tokens[STATE_LENGTH] = { STATE_MVP_MATRIX, 0, 0, 0, 0 };
+ prog->NumTemporaries++;
- /* tokens[4] = matrix modifier */
-#ifdef PREFER_DP4
- tokens[4] = 0; /* not transposed or inverted */
-#else
- tokens[4] = STATE_MATRIX_TRANSPOSE;
-#endif
- paramList = prog->Parameters;
-
- vpi = _mesa_alloc_instructions(prog->NumInstructions + 4);
- _mesa_init_instructions(vpi, prog->NumInstructions + 4);
-
- for (i = 0; i < 4; i++) {
- GLint idx;
- tokens[2] = tokens[3] = i; /* matrix row[i]..row[i] */
- idx = _mesa_add_state_reference(paramList, tokens);
-#ifdef PREFER_DP4
- vpi[i].Opcode = OPCODE_DP4;
- vpi[i].StringPos = 0;
- vpi[i].Data = 0;
-
- vpi[i].DstReg.File = PROGRAM_OUTPUT;
- vpi[i].DstReg.Index = VERT_RESULT_HPOS;
- vpi[i].DstReg.WriteMask = 1 << i;
- vpi[i].DstReg.CondMask = COND_TR;
-
- vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
- vpi[i].SrcReg[0].Index = idx;
- vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
-
- vpi[i].SrcReg[1].File = PROGRAM_INPUT;
- vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
- vpi[i].SrcReg[1].Swizzle = SWIZZLE_XYZW;
-#else
- if (i == 0)
- vpi[i].Opcode = OPCODE_MUL;
- else
- vpi[i].Opcode = OPCODE_MAD;
+ for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
+ if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_HPOS) {
+ vpi->DstReg.File = PROGRAM_TEMPORARY;
+ vpi->DstReg.Index = tempregi;
+ }
+ }
- vpi[i].Data = 0;
+ insert_wpos(prog, tempregi, tex_id);
- if (i == 3)
- vpi[i].DstReg.File = PROGRAM_OUTPUT;
- else
- vpi[i].DstReg.File = PROGRAM_TEMPORARY;
- vpi[i].DstReg.Index = 0;
- vpi[i].DstReg.WriteMask = 0xf;
- vpi[i].DstReg.CondMask = COND_TR;
-
- vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
- vpi[i].SrcReg[0].Index = idx;
- vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
-
- vpi[i].SrcReg[1].File = PROGRAM_INPUT;
- vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
- vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i);
-
- if (i > 0) {
- vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY;
- vpi[i].SrcReg[2].Index = 0;
- vpi[i].SrcReg[2].Swizzle = SWIZZLE_XYZW;
+ prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id);
+}
+
+/**
+ * The fogcoord attribute is special in that only the first component
+ * is relevant, and the remaining components are always fixed (when read
+ * from by the fragment program) to yield an X001 pattern.
+ *
+ * We need to enforce this either in the vertex program or in the fragment
+ * program, and this code chooses not to enforce it in the vertex program.
+ * This is slightly cheaper, as long as the fragment program does not use
+ * weird swizzles.
+ *
+ * And it seems that usually, weird swizzles are not used, so...
+ *
+ * See also the counterpart rewriting for fragment programs.
+ */
+static void fog_as_texcoord(struct gl_program *prog, int tex_id)
+{
+ struct prog_instruction *vpi;
+
+ vpi = prog->Instructions;
+ while (vpi->Opcode != OPCODE_END) {
+ if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_FOGC) {
+ vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id;
+ vpi->DstReg.WriteMask = WRITEMASK_X;
}
-#endif
+
+ ++vpi;
}
- _mesa_copy_instructions(&vpi[i], prog->Instructions,
- prog->NumInstructions);
+ prog->OutputsWritten &= ~(1 << VERT_RESULT_FOGC);
+ prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id);
+}
- free(prog->Instructions);
+static int translateABS(struct gl_program *prog, int pos)
+{
+ struct prog_instruction *inst;
- prog->Instructions = vpi;
+ inst = &prog->Instructions[pos];
- prog->NumInstructions += 4;
- vpi = &prog->Instructions[prog->NumInstructions - 1];
+ inst->Opcode = OPCODE_MAX;
+ inst->SrcReg[1] = inst->SrcReg[0];
+ inst->SrcReg[1].Negate ^= NEGATE_XYZW;
- assert(vpi->Opcode == OPCODE_END);
+ return 0;
}
-static void insert_wpos(struct r300_vertex_program *vp, struct gl_program *prog,
- GLuint temp_index)
+static int translateDP3(struct gl_program *prog, int pos)
{
- struct prog_instruction *vpi;
- struct prog_instruction *vpi_insert;
- int i = 0;
+ struct prog_instruction *inst;
- vpi = _mesa_alloc_instructions(prog->NumInstructions + 2);
- _mesa_init_instructions(vpi, prog->NumInstructions + 2);
- /* all but END */
- _mesa_copy_instructions(vpi, prog->Instructions,
- prog->NumInstructions - 1);
- /* END */
- _mesa_copy_instructions(&vpi[prog->NumInstructions + 1],
- &prog->Instructions[prog->NumInstructions - 1],
- 1);
- vpi_insert = &vpi[prog->NumInstructions - 1];
+ inst = &prog->Instructions[pos];
- vpi_insert[i].Opcode = OPCODE_MOV;
+ inst->Opcode = OPCODE_DP4;
+ inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
- vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
- vpi_insert[i].DstReg.Index = VERT_RESULT_HPOS;
- vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
- vpi_insert[i].DstReg.CondMask = COND_TR;
+ return 0;
+}
- vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
- vpi_insert[i].SrcReg[0].Index = temp_index;
- vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
- i++;
+static int translateDPH(struct gl_program *prog, int pos)
+{
+ struct prog_instruction *inst;
- vpi_insert[i].Opcode = OPCODE_MOV;
+ inst = &prog->Instructions[pos];
- vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
- vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0 + vp->wpos_idx;
- vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
- vpi_insert[i].DstReg.CondMask = COND_TR;
+ inst->Opcode = OPCODE_DP4;
+ inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
- vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
- vpi_insert[i].SrcReg[0].Index = temp_index;
- vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
- i++;
+ return 0;
+}
+
+static int translateFLR(struct gl_program *prog, int pos)
+{
+ struct prog_instruction *inst;
+ struct prog_dst_register dst;
+ int tmp_idx;
- free(prog->Instructions);
+ tmp_idx = prog->NumTemporaries++;
- prog->Instructions = vpi;
+ _mesa_insert_instructions(prog, pos + 1, 1);
- prog->NumInstructions += i;
- vpi = &prog->Instructions[prog->NumInstructions - 1];
+ inst = &prog->Instructions[pos];
+ dst = inst->DstReg;
- assert(vpi->Opcode == OPCODE_END);
+ inst->Opcode = OPCODE_FRC;
+ inst->DstReg.File = PROGRAM_TEMPORARY;
+ inst->DstReg.Index = tmp_idx;
+ ++inst;
+
+ inst->Opcode = OPCODE_ADD;
+ inst->DstReg = dst;
+ inst->SrcReg[0] = (inst-1)->SrcReg[0];
+ inst->SrcReg[1].File = PROGRAM_TEMPORARY;
+ inst->SrcReg[1].Index = tmp_idx;
+ inst->SrcReg[1].Negate = NEGATE_XYZW;
+
+ return 1;
}
-static void pos_as_texcoord(struct r300_vertex_program *vp,
- struct gl_program *prog)
+static int translateSUB(struct gl_program *prog, int pos)
{
- struct prog_instruction *vpi;
- GLuint tempregi = prog->NumTemporaries;
- /* should do something else if no temps left... */
- prog->NumTemporaries++;
+ struct prog_instruction *inst;
- for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
- if (vpi->DstReg.File == PROGRAM_OUTPUT
- && vpi->DstReg.Index == VERT_RESULT_HPOS) {
- vpi->DstReg.File = PROGRAM_TEMPORARY;
- vpi->DstReg.Index = tempregi;
+ inst = &prog->Instructions[pos];
+
+ inst->Opcode = OPCODE_ADD;
+ inst->SrcReg[1].Negate ^= NEGATE_XYZW;
+
+ return 0;
+}
+
+static int translateSWZ(struct gl_program *prog, int pos)
+{
+ prog->Instructions[pos].Opcode = OPCODE_MOV;
+
+ return 0;
+}
+
+static int translateXPD(struct gl_program *prog, int pos)
+{
+ struct prog_instruction *inst;
+ int tmp_idx;
+
+ tmp_idx = prog->NumTemporaries++;
+
+ _mesa_insert_instructions(prog, pos + 1, 1);
+
+ inst = &prog->Instructions[pos];
+
+ *(inst+1) = *inst;
+
+ inst->Opcode = OPCODE_MUL;
+ inst->DstReg.File = PROGRAM_TEMPORARY;
+ inst->DstReg.Index = tmp_idx;
+ inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W);
+ inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W);
+ ++inst;
+
+ inst->Opcode = OPCODE_MAD;
+ inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W);
+ inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W);
+ inst->SrcReg[1].Negate ^= NEGATE_XYZW;
+ inst->SrcReg[2].File = PROGRAM_TEMPORARY;
+ inst->SrcReg[2].Index = tmp_idx;
+
+ return 1;
+}
+
+static void translateInsts(struct gl_program *prog)
+{
+ struct prog_instruction *inst;
+ int i;
+
+ for (i = 0; i < prog->NumInstructions; ++i) {
+ inst = &prog->Instructions[i];
+
+ switch (inst->Opcode) {
+ case OPCODE_ABS:
+ i += translateABS(prog, i);
+ break;
+ case OPCODE_DP3:
+ i += translateDP3(prog, i);
+ break;
+ case OPCODE_DPH:
+ i += translateDPH(prog, i);
+ break;
+ case OPCODE_FLR:
+ i += translateFLR(prog, i);
+ break;
+ case OPCODE_SUB:
+ i += translateSUB(prog, i);
+ break;
+ case OPCODE_SWZ:
+ i += translateSWZ(prog, i);
+ break;
+ case OPCODE_XPD:
+ i += translateXPD(prog, i);
+ break;
+ default:
+ break;
}
}
- insert_wpos(vp, prog, tempregi);
}
-static struct r300_vertex_program *build_program(struct r300_vertex_program_key
- *wanted_key, struct gl_vertex_program
- *mesa_vp, GLint wpos_idx)
+#define ADD_OUTPUT(fp_attr, vp_result) \
+ do { \
+ if ((FpReads & (1 << (fp_attr))) && !(prog->OutputsWritten & (1 << (vp_result)))) { \
+ OutputsAdded |= 1 << (vp_result); \
+ count++; \
+ } \
+ } while (0)
+
+static void addArtificialOutputs(GLcontext *ctx, struct gl_program *prog)
{
- struct r300_vertex_program *vp;
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ GLuint OutputsAdded, FpReads;
+ int i, count;
- vp = _mesa_calloc(sizeof(*vp));
- _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
- vp->wpos_idx = wpos_idx;
+ OutputsAdded = 0;
+ count = 0;
+ FpReads = r300->selected_fp->Base->InputsRead;
+
+ ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0);
+ ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1);
+
+ for (i = 0; i < 7; ++i) {
+ ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i);
+ }
+
+ /* Some outputs may be artificially added, to match the inputs of the fragment program.
+ * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by
+ * vertex program are undefined, so just use MOV [vertex_result], CONST[0]
+ */
+ if (count > 0) {
+ struct prog_instruction *inst;
+
+ _mesa_insert_instructions(prog, prog->NumInstructions - 1, count);
+ inst = &prog->Instructions[prog->NumInstructions - 1 - count];
+
+ for (i = 0; i < VERT_RESULT_MAX; ++i) {
+ if (OutputsAdded & (1 << i)) {
+ inst->Opcode = OPCODE_MOV;
+
+ inst->DstReg.File = PROGRAM_OUTPUT;
+ inst->DstReg.Index = i;
+ inst->DstReg.WriteMask = WRITEMASK_XYZW;
+ inst->DstReg.CondMask = COND_TR;
+
+ inst->SrcReg[0].File = PROGRAM_CONSTANT;
+ inst->SrcReg[0].Index = 0;
+ inst->SrcReg[0].Swizzle = SWIZZLE_XYZW;
- if (mesa_vp->IsPositionInvariant) {
- position_invariant(&mesa_vp->Base);
+ ++inst;
+ }
+ }
+
+ prog->OutputsWritten |= OutputsAdded;
}
+}
+
+#undef ADD_OUTPUT
+
+static void nqssadceInit(struct nqssadce_state* s)
+{
+ r300ContextPtr r300 = R300_CONTEXT(s->Ctx);
+ GLuint fp_reads;
+
+ fp_reads = r300->selected_fp->Base->InputsRead;
+ {
+ if (fp_reads & FRAG_BIT_COL0) {
+ s->Outputs[VERT_RESULT_COL0].Sourced = WRITEMASK_XYZW;
+ s->Outputs[VERT_RESULT_BFC0].Sourced = WRITEMASK_XYZW;
+ }
- if (wpos_idx > -1) {
- pos_as_texcoord(vp, &mesa_vp->Base);
+ if (fp_reads & FRAG_BIT_COL1) {
+ s->Outputs[VERT_RESULT_COL1].Sourced = WRITEMASK_XYZW;
+ s->Outputs[VERT_RESULT_BFC1].Sourced = WRITEMASK_XYZW;
+ }
}
- assert(mesa_vp->Base.NumInstructions);
- vp->num_temporaries = mesa_vp->Base.NumTemporaries;
- r300TranslateVertexShader(vp, mesa_vp->Base.Instructions);
+ {
+ int i;
+ for (i = 0; i < 8; ++i) {
+ if (fp_reads & FRAG_BIT_TEX(i)) {
+ s->Outputs[VERT_RESULT_TEX0 + i].Sourced = WRITEMASK_XYZW;
+ }
+ }
+ }
- return vp;
+ s->Outputs[VERT_RESULT_HPOS].Sourced = WRITEMASK_XYZW;
+ if (s->Program->OutputsWritten & (1 << VERT_RESULT_PSIZ))
+ s->Outputs[VERT_RESULT_PSIZ].Sourced = WRITEMASK_X;
}
-static void add_outputs(struct r300_vertex_program_key *key, GLint vert)
+static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg)
{
- if (key->OutputsWritten & (1 << vert))
- return;
+ (void) opcode;
+ (void) reg;
- key->OutputsWritten |= 1 << vert;
- key->OutputsAdded |= 1 << vert;
+ return GL_TRUE;
}
-void r300SelectVertexShader(r300ContextPtr r300)
+static struct r300_vertex_program *build_program(GLcontext *ctx,
+ struct r300_vertex_program_key *wanted_key,
+ const struct gl_vertex_program *mesa_vp)
{
- GLcontext *ctx = ctx = r300->radeon.glCtx;
- GLuint InputsRead;
- struct r300_vertex_program_key wanted_key = { 0 };
- GLint i;
- struct r300_vertex_program_cont *vpc;
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
struct r300_vertex_program *vp;
- GLint wpos_idx;
+ struct gl_program *prog;
- vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
- wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead;
- wanted_key.OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
- InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
-
- wpos_idx = -1;
- if (InputsRead & FRAG_BIT_WPOS) {
- for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
- if (!(InputsRead & (FRAG_BIT_TEX0 << i)))
- break;
+ vp = _mesa_calloc(sizeof(*vp));
+ vp->Base = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base);
+ _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
- if (i == ctx->Const.MaxTextureUnits) {
- fprintf(stderr, "\tno free texcoord found\n");
- _mesa_exit(-1);
- }
+ prog = &vp->Base->Base;
- wanted_key.OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
- wpos_idx = i;
+ if (RADEON_DEBUG & DEBUG_VERTS) {
+ fprintf(stderr, "Initial vertex program:\n");
+ _mesa_print_program(prog);
+ fflush(stdout);
}
- add_outputs(&wanted_key, VERT_RESULT_HPOS);
+ if (vp->Base->IsPositionInvariant) {
+ _mesa_insert_mvp_code(ctx, vp->Base);
+ }
+
+ if (r300->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) {
+ pos_as_texcoord(&vp->Base->Base, r300->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0);
+ }
- if (InputsRead & FRAG_BIT_COL0) {
- add_outputs(&wanted_key, VERT_RESULT_COL0);
+ if (r300->selected_fp->fog_attr != FRAG_ATTRIB_MAX) {
+ fog_as_texcoord(&vp->Base->Base, r300->selected_fp->fog_attr - FRAG_ATTRIB_TEX0);
}
- if (InputsRead & FRAG_BIT_COL1) {
- add_outputs(&wanted_key, VERT_RESULT_COL1);
+ addArtificialOutputs(ctx, prog);
+
+ translateInsts(prog);
+
+ if (RADEON_DEBUG & DEBUG_VERTS) {
+ fprintf(stderr, "Vertex program after native rewrite:\n");
+ _mesa_print_program(prog);
+ fflush(stdout);
}
- for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
- if (InputsRead & (FRAG_BIT_TEX0 << i)) {
- add_outputs(&wanted_key, VERT_RESULT_TEX0 + i);
+ {
+ struct radeon_nqssadce_descr nqssadce = {
+ .Init = &nqssadceInit,
+ .IsNativeSwizzle = &swizzleIsNative,
+ .BuildSwizzle = NULL
+ };
+ radeonNqssaDce(ctx, prog, &nqssadce);
+
+ /* We need this step for reusing temporary registers */
+ _mesa_optimize_program(ctx, prog);
+
+ if (RADEON_DEBUG & DEBUG_VERTS) {
+ fprintf(stderr, "Vertex program after NQSSADCE:\n");
+ _mesa_print_program(prog);
+ fflush(stdout);
}
}
- if (vpc->mesa_program.IsPositionInvariant) {
- /* we wan't position don't we ? */
- wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS);
+ assert(prog->NumInstructions);
+ {
+ struct prog_instruction *inst;
+ int max, i, tmp;
+
+ inst = prog->Instructions;
+ max = -1;
+ while (inst->Opcode != OPCODE_END) {
+ tmp = _mesa_num_inst_src_regs(inst->Opcode);
+ for (i = 0; i < tmp; ++i) {
+ if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) {
+ if ((int) inst->SrcReg[i].Index > max) {
+ max = inst->SrcReg[i].Index;
+ }
+ }
+ }
+
+ if (_mesa_num_inst_dst_regs(inst->Opcode)) {
+ if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+ if ((int) inst->DstReg.Index > max) {
+ max = inst->DstReg.Index;
+ }
+ }
+ }
+ ++inst;
+ }
+
+ /* We actually want highest index of used temporary register,
+ * not the number of temporaries used.
+ * These values aren't always the same.
+ */
+ vp->num_temporaries = max + 1;
}
- for (vp = vpc->progs; vp; vp = vp->next)
+ return vp;
+}
+
+struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ struct r300_vertex_program_key wanted_key = { 0 };
+ struct r300_vertex_program_cont *vpc;
+ struct r300_vertex_program *vp;
+
+ vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
+ wanted_key.FpReads = r300->selected_fp->Base->InputsRead;
+ wanted_key.FogAttr = r300->selected_fp->fog_attr;
+ wanted_key.WPosAttr = r300->selected_fp->wpos_attr;
+
+ for (vp = vpc->progs; vp; vp = vp->next) {
if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key))
== 0) {
- r300->selected_vp = vp;
- return;
+ return r300->selected_vp = vp;
}
- //_mesa_print_program(&vpc->mesa_program.Base);
+ }
- vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx);
+ vp = build_program(ctx, &wanted_key, &vpc->mesa_program);
vp->next = vpc->progs;
vpc->progs = vp;
- r300->selected_vp = vp;
+
+ return r300->selected_vp = vp;
+}
+
+#define bump_vpu_count(ptr, new_count) do { \
+ drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \
+ int _nc=(new_count)/4; \
+ assert(_nc < 256); \
+ if(_nc>_p->vpu.count)_p->vpu.count=_nc; \
+ } while(0)
+
+static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_shader_hw_code *code)
+{
+ int i;
+
+ assert((code->length > 0) && (code->length % 4 == 0));
+
+ switch ((dest >> 8) & 0xf) {
+ case 0:
+ R300_STATECHANGE(r300, vpi);
+ for (i = 0; i < code->length; i++)
+ r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
+ bump_vpu_count(r300->hw.vpi.cmd, code->length + 4 * (dest & 0xff));
+ break;
+ case 2:
+ R300_STATECHANGE(r300, vpp);
+ for (i = 0; i < code->length; i++)
+ r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
+ bump_vpu_count(r300->hw.vpp.cmd, code->length + 4 * (dest & 0xff));
+ break;
+ case 4:
+ R300_STATECHANGE(r300, vps);
+ for (i = 0; i < code->length; i++)
+ r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
+ bump_vpu_count(r300->hw.vps.cmd, code->length + 4 * (dest & 0xff));
+ break;
+ default:
+ fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest);
+ _mesa_exit(-1);
+ }
+}
+
+void r300SetupVertexProgram(r300ContextPtr rmesa)
+{
+ GLcontext *ctx = rmesa->radeon.glCtx;
+ struct r300_vertex_program *prog = rmesa->selected_vp;
+ int inst_count = 0;
+ int param_count = 0;
+
+ /* Reset state, in case we don't use something */
+ ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0;
+ ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0;
+ ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0;
+
+ R300_STATECHANGE(rmesa, vpp);
+ param_count = r300VertexProgUpdateParams(ctx, prog->Base, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
+ bump_vpu_count(rmesa->hw.vpp.cmd, param_count);
+ param_count /= 4;
+
+ r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->hw_code));
+ inst_count = (prog->hw_code.length / 4) - 1;
+
+ r300VapCntl(rmesa, _mesa_bitcount(prog->Base->Base.InputsRead),
+ _mesa_bitcount(prog->Base->Base.OutputsWritten), prog->num_temporaries);
+
+ R300_STATECHANGE(rmesa, pvs);
+ rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
+ (inst_count << R300_PVS_LAST_INST_SHIFT);
+
+ rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT);
+ rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
}