diff options
Diffstat (limited to 'src/mesa/drivers/dri/r300/r300_vertprog.c')
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_vertprog.c | 777 |
1 files changed, 518 insertions, 259 deletions
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index 146daa367c..de32013032 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -32,12 +32,17 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/macros.h" #include "main/enums.h" #include "shader/program.h" +#include "shader/programopt.h" #include "shader/prog_instruction.h" +#include "shader/prog_optimize.h" #include "shader/prog_parameter.h" +#include "shader/prog_print.h" #include "shader/prog_statevars.h" #include "tnl/tnl.h" +#include "radeon_nqssadce.h" #include "r300_context.h" +#include "r300_state.h" /* TODO: Get rid of t_src_class call */ #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \ @@ -64,20 +69,18 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \ if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \ WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \ - vp->native = GL_FALSE; \ + vp->error = GL_TRUE; \ } \ u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \ } while (0) -int r300VertexProgUpdateParams(GLcontext * ctx, - struct r300_vertex_program_cont *vp, float *dst) +static int r300VertexProgUpdateParams(GLcontext * ctx, struct gl_vertex_program *vp, float *dst) { int pi; - struct gl_vertex_program *mesa_vp = &vp->mesa_program; float *dst_o = dst; struct gl_program_parameter_list *paramList; - if (mesa_vp->IsNVProgram) { + if (vp->IsNVProgram) { _mesa_load_tracked_matrices(ctx); for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) { @@ -89,16 +92,18 @@ int r300VertexProgUpdateParams(GLcontext * ctx, return dst - dst_o; } - assert(mesa_vp->Base.Parameters); - _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters); + if (!vp->Base.Parameters) + return 0; - if (mesa_vp->Base.Parameters->NumParameters * 4 > + _mesa_load_state_parameters(ctx, vp->Base.Parameters); + + if (vp->Base.Parameters->NumParameters * 4 > VSF_MAX_FRAGMENT_LENGTH) { fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__); _mesa_exit(-1); } - paramList = mesa_vp->Base.Parameters; + paramList = vp->Base.Parameters; for (pi = 0; pi < paramList->NumParameters; pi++) { switch (paramList->Parameters[pi].Type) { case PROGRAM_STATE_VAR: @@ -214,21 +219,8 @@ static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller) static unsigned long t_src_index(struct r300_vertex_program *vp, struct prog_src_register *src) { - int i; - int max_reg = -1; - if (src->File == PROGRAM_INPUT) { - if (vp->inputs[src->Index] != -1) - return vp->inputs[src->Index]; - - for (i = 0; i < VERT_ATTRIB_MAX; i++) - if (vp->inputs[i] > max_reg) - max_reg = vp->inputs[i]; - - vp->inputs[src->Index] = max_reg + 1; - - //vp_dump_inputs(vp, __FUNCTION__); - + assert(vp->inputs[src->Index] != -1); return vp->inputs[src->Index]; } else { if (src->Index < 0) { @@ -943,60 +935,80 @@ static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp, static void t_inputs_outputs(struct r300_vertex_program *vp) { int i; - int cur_reg = 0; + int cur_reg; + GLuint OutputsWritten, InputsRead; - for (i = 0; i < VERT_ATTRIB_MAX; i++) - vp->inputs[i] = -1; + OutputsWritten = vp->Base->Base.OutputsWritten; + InputsRead = vp->Base->Base.InputsRead; + + cur_reg = -1; + for (i = 0; i < VERT_ATTRIB_MAX; i++) { + if (InputsRead & (1 << i)) + vp->inputs[i] = ++cur_reg; + else + vp->inputs[i] = -1; + } + cur_reg = 0; for (i = 0; i < VERT_RESULT_MAX; i++) vp->outputs[i] = -1; - assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)); + assert(OutputsWritten & (1 << VERT_RESULT_HPOS)); - if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) { + if (OutputsWritten & (1 << VERT_RESULT_HPOS)) { vp->outputs[VERT_RESULT_HPOS] = cur_reg++; } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) { + if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) { vp->outputs[VERT_RESULT_PSIZ] = cur_reg++; } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) { + /* If we're writing back facing colors we need to send + * four colors to make front/back face colors selection work. + * If the vertex program doesn't write all 4 colors, lets + * pretend it does by skipping output index reg so the colors + * get written into appropriate output vectors. + */ + if (OutputsWritten & (1 << VERT_RESULT_COL0)) { vp->outputs[VERT_RESULT_COL0] = cur_reg++; + } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || + OutputsWritten & (1 << VERT_RESULT_BFC1)) { + cur_reg++; } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) { - vp->outputs[VERT_RESULT_COL1] = - vp->outputs[VERT_RESULT_COL0] + 1; - cur_reg = vp->outputs[VERT_RESULT_COL1] + 1; + if (OutputsWritten & (1 << VERT_RESULT_COL1)) { + vp->outputs[VERT_RESULT_COL1] = cur_reg++; + } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || + OutputsWritten & (1 << VERT_RESULT_BFC1)) { + cur_reg++; } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) { - vp->outputs[VERT_RESULT_BFC0] = - vp->outputs[VERT_RESULT_COL0] + 2; - cur_reg = vp->outputs[VERT_RESULT_BFC0] + 2; + if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { + vp->outputs[VERT_RESULT_BFC0] = cur_reg++; + } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { + cur_reg++; } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) { - vp->outputs[VERT_RESULT_BFC1] = - vp->outputs[VERT_RESULT_COL0] + 3; - cur_reg = vp->outputs[VERT_RESULT_BFC1] + 1; + if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { + vp->outputs[VERT_RESULT_BFC1] = cur_reg++; + } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { + cur_reg++; } for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) { - if (vp->key.OutputsWritten & (1 << i)) { + if (OutputsWritten & (1 << i)) { vp->outputs[i] = cur_reg++; } } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) { + if (OutputsWritten & (1 << VERT_RESULT_FOGC)) { vp->outputs[VERT_RESULT_FOGC] = cur_reg++; } } -static void r300TranslateVertexShader(struct r300_vertex_program *vp, - struct prog_instruction *vpi) +void r300TranslateVertexShader(struct r300_vertex_program *vp) { + struct prog_instruction *vpi = vp->Base->Base.Instructions; int i; GLuint *inst; unsigned long num_operands; @@ -1007,14 +1019,13 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp, struct prog_src_register src[3]; vp->pos_end = 0; /* Not supported yet */ - vp->program.length = 0; - /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */ + vp->hw_code.length = 0; vp->translated = GL_TRUE; - vp->native = GL_TRUE; + vp->error = GL_FALSE; t_inputs_outputs(vp); - for (inst = vp->program.body.i; vpi->Opcode != OPCODE_END; + for (inst = vp->hw_code.body.d; vpi->Opcode != OPCODE_END; vpi++, inst += 4) { FREE_TEMPS(); @@ -1176,293 +1187,541 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp, &u_temp_i); break; default: - assert(0); + vp->error = GL_TRUE; break; } } - /* Some outputs may be artificially added, to match the inputs - of the fragment program. Blank the outputs here. */ - for (i = 0; i < VERT_RESULT_MAX; i++) { - if (vp->key.OutputsAdded & (1 << i)) { - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - vp->outputs[i], - VSF_FLAG_ALL, - PVS_DST_REG_OUT); - inst[1] = __CONST(0, SWIZZLE_ZERO); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - inst += 4; - } - } - - vp->program.length = (inst - vp->program.body.i); - if (vp->program.length >= VSF_MAX_FRAGMENT_LENGTH) { - vp->program.length = 0; - vp->native = GL_FALSE; + vp->hw_code.length = (inst - vp->hw_code.body.d); + if (vp->hw_code.length >= VSF_MAX_FRAGMENT_LENGTH) { + vp->error = GL_TRUE; } -#if 0 - fprintf(stderr, "hw program:\n"); - for (i = 0; i < vp->program.length; i++) - fprintf(stderr, "%08x\n", vp->program.body.d[i]); -#endif } -/* DP4 version seems to trigger some hw peculiarity */ -//#define PREFER_DP4 +static void insert_wpos(struct gl_program *prog, GLuint temp_index, int tex_id) +{ + struct prog_instruction *vpi; + + _mesa_insert_instructions(prog, prog->NumInstructions - 1, 2); + + vpi = &prog->Instructions[prog->NumInstructions - 3]; + + vpi->Opcode = OPCODE_MOV; + + vpi->DstReg.File = PROGRAM_OUTPUT; + vpi->DstReg.Index = VERT_RESULT_HPOS; + vpi->DstReg.WriteMask = WRITEMASK_XYZW; + vpi->DstReg.CondMask = COND_TR; -static void position_invariant(struct gl_program *prog) + vpi->SrcReg[0].File = PROGRAM_TEMPORARY; + vpi->SrcReg[0].Index = temp_index; + vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW; + + ++vpi; + + vpi->Opcode = OPCODE_MOV; + + vpi->DstReg.File = PROGRAM_OUTPUT; + vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id; + vpi->DstReg.WriteMask = WRITEMASK_XYZW; + vpi->DstReg.CondMask = COND_TR; + + vpi->SrcReg[0].File = PROGRAM_TEMPORARY; + vpi->SrcReg[0].Index = temp_index; + vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW; + + ++vpi; + + vpi->Opcode = OPCODE_END; +} + +static void pos_as_texcoord(struct gl_program *prog, int tex_id) { struct prog_instruction *vpi; - struct gl_program_parameter_list *paramList; - int i; + GLuint tempregi = prog->NumTemporaries; - gl_state_index tokens[STATE_LENGTH] = { STATE_MVP_MATRIX, 0, 0, 0, 0 }; + prog->NumTemporaries++; - /* tokens[4] = matrix modifier */ -#ifdef PREFER_DP4 - tokens[4] = 0; /* not transposed or inverted */ -#else - tokens[4] = STATE_MATRIX_TRANSPOSE; -#endif - paramList = prog->Parameters; - - vpi = _mesa_alloc_instructions(prog->NumInstructions + 4); - _mesa_init_instructions(vpi, prog->NumInstructions + 4); - - for (i = 0; i < 4; i++) { - GLint idx; - tokens[2] = tokens[3] = i; /* matrix row[i]..row[i] */ - idx = _mesa_add_state_reference(paramList, tokens); -#ifdef PREFER_DP4 - vpi[i].Opcode = OPCODE_DP4; - vpi[i].StringPos = 0; - vpi[i].Data = 0; - - vpi[i].DstReg.File = PROGRAM_OUTPUT; - vpi[i].DstReg.Index = VERT_RESULT_HPOS; - vpi[i].DstReg.WriteMask = 1 << i; - vpi[i].DstReg.CondMask = COND_TR; - - vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR; - vpi[i].SrcReg[0].Index = idx; - vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - - vpi[i].SrcReg[1].File = PROGRAM_INPUT; - vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS; - vpi[i].SrcReg[1].Swizzle = SWIZZLE_XYZW; -#else - if (i == 0) - vpi[i].Opcode = OPCODE_MUL; - else - vpi[i].Opcode = OPCODE_MAD; + for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) { + if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_HPOS) { + vpi->DstReg.File = PROGRAM_TEMPORARY; + vpi->DstReg.Index = tempregi; + } + } - vpi[i].Data = 0; + insert_wpos(prog, tempregi, tex_id); - if (i == 3) - vpi[i].DstReg.File = PROGRAM_OUTPUT; - else - vpi[i].DstReg.File = PROGRAM_TEMPORARY; - vpi[i].DstReg.Index = 0; - vpi[i].DstReg.WriteMask = 0xf; - vpi[i].DstReg.CondMask = COND_TR; - - vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR; - vpi[i].SrcReg[0].Index = idx; - vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - - vpi[i].SrcReg[1].File = PROGRAM_INPUT; - vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS; - vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i); - - if (i > 0) { - vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY; - vpi[i].SrcReg[2].Index = 0; - vpi[i].SrcReg[2].Swizzle = SWIZZLE_XYZW; + prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id); +} + +/** + * The fogcoord attribute is special in that only the first component + * is relevant, and the remaining components are always fixed (when read + * from by the fragment program) to yield an X001 pattern. + * + * We need to enforce this either in the vertex program or in the fragment + * program, and this code chooses not to enforce it in the vertex program. + * This is slightly cheaper, as long as the fragment program does not use + * weird swizzles. + * + * And it seems that usually, weird swizzles are not used, so... + * + * See also the counterpart rewriting for fragment programs. + */ +static void fog_as_texcoord(struct gl_program *prog, int tex_id) +{ + struct prog_instruction *vpi; + + vpi = prog->Instructions; + while (vpi->Opcode != OPCODE_END) { + if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_FOGC) { + vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id; + vpi->DstReg.WriteMask = WRITEMASK_X; } -#endif + + ++vpi; } - _mesa_copy_instructions(&vpi[i], prog->Instructions, - prog->NumInstructions); + prog->OutputsWritten &= ~(1 << VERT_RESULT_FOGC); + prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id); +} - free(prog->Instructions); +static int translateABS(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; - prog->Instructions = vpi; + inst = &prog->Instructions[pos]; - prog->NumInstructions += 4; - vpi = &prog->Instructions[prog->NumInstructions - 1]; + inst->Opcode = OPCODE_MAX; + inst->SrcReg[1] = inst->SrcReg[0]; + inst->SrcReg[1].Negate ^= NEGATE_XYZW; - assert(vpi->Opcode == OPCODE_END); + return 0; } -static void insert_wpos(struct r300_vertex_program *vp, struct gl_program *prog, - GLuint temp_index) +static int translateDP3(struct gl_program *prog, int pos) { - struct prog_instruction *vpi; - struct prog_instruction *vpi_insert; - int i = 0; + struct prog_instruction *inst; - vpi = _mesa_alloc_instructions(prog->NumInstructions + 2); - _mesa_init_instructions(vpi, prog->NumInstructions + 2); - /* all but END */ - _mesa_copy_instructions(vpi, prog->Instructions, - prog->NumInstructions - 1); - /* END */ - _mesa_copy_instructions(&vpi[prog->NumInstructions + 1], - &prog->Instructions[prog->NumInstructions - 1], - 1); - vpi_insert = &vpi[prog->NumInstructions - 1]; + inst = &prog->Instructions[pos]; - vpi_insert[i].Opcode = OPCODE_MOV; + inst->Opcode = OPCODE_DP4; + inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - vpi_insert[i].DstReg.File = PROGRAM_OUTPUT; - vpi_insert[i].DstReg.Index = VERT_RESULT_HPOS; - vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW; - vpi_insert[i].DstReg.CondMask = COND_TR; + return 0; +} - vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY; - vpi_insert[i].SrcReg[0].Index = temp_index; - vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - i++; +static int translateDPH(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; - vpi_insert[i].Opcode = OPCODE_MOV; + inst = &prog->Instructions[pos]; - vpi_insert[i].DstReg.File = PROGRAM_OUTPUT; - vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0 + vp->wpos_idx; - vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW; - vpi_insert[i].DstReg.CondMask = COND_TR; + inst->Opcode = OPCODE_DP4; + inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); - vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY; - vpi_insert[i].SrcReg[0].Index = temp_index; - vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - i++; + return 0; +} + +static int translateFLR(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; + struct prog_dst_register dst; + int tmp_idx; - free(prog->Instructions); + tmp_idx = prog->NumTemporaries++; - prog->Instructions = vpi; + _mesa_insert_instructions(prog, pos + 1, 1); - prog->NumInstructions += i; - vpi = &prog->Instructions[prog->NumInstructions - 1]; + inst = &prog->Instructions[pos]; + dst = inst->DstReg; - assert(vpi->Opcode == OPCODE_END); + inst->Opcode = OPCODE_FRC; + inst->DstReg.File = PROGRAM_TEMPORARY; + inst->DstReg.Index = tmp_idx; + ++inst; + + inst->Opcode = OPCODE_ADD; + inst->DstReg = dst; + inst->SrcReg[0] = (inst-1)->SrcReg[0]; + inst->SrcReg[1].File = PROGRAM_TEMPORARY; + inst->SrcReg[1].Index = tmp_idx; + inst->SrcReg[1].Negate = NEGATE_XYZW; + + return 1; } -static void pos_as_texcoord(struct r300_vertex_program *vp, - struct gl_program *prog) +static int translateSUB(struct gl_program *prog, int pos) { - struct prog_instruction *vpi; - GLuint tempregi = prog->NumTemporaries; - /* should do something else if no temps left... */ - prog->NumTemporaries++; + struct prog_instruction *inst; - for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) { - if (vpi->DstReg.File == PROGRAM_OUTPUT - && vpi->DstReg.Index == VERT_RESULT_HPOS) { - vpi->DstReg.File = PROGRAM_TEMPORARY; - vpi->DstReg.Index = tempregi; + inst = &prog->Instructions[pos]; + + inst->Opcode = OPCODE_ADD; + inst->SrcReg[1].Negate ^= NEGATE_XYZW; + + return 0; +} + +static int translateSWZ(struct gl_program *prog, int pos) +{ + prog->Instructions[pos].Opcode = OPCODE_MOV; + + return 0; +} + +static int translateXPD(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; + int tmp_idx; + + tmp_idx = prog->NumTemporaries++; + + _mesa_insert_instructions(prog, pos + 1, 1); + + inst = &prog->Instructions[pos]; + + *(inst+1) = *inst; + + inst->Opcode = OPCODE_MUL; + inst->DstReg.File = PROGRAM_TEMPORARY; + inst->DstReg.Index = tmp_idx; + inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W); + inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W); + ++inst; + + inst->Opcode = OPCODE_MAD; + inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W); + inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W); + inst->SrcReg[1].Negate ^= NEGATE_XYZW; + inst->SrcReg[2].File = PROGRAM_TEMPORARY; + inst->SrcReg[2].Index = tmp_idx; + + return 1; +} + +static void translateInsts(struct gl_program *prog) +{ + struct prog_instruction *inst; + int i; + + for (i = 0; i < prog->NumInstructions; ++i) { + inst = &prog->Instructions[i]; + + switch (inst->Opcode) { + case OPCODE_ABS: + i += translateABS(prog, i); + break; + case OPCODE_DP3: + i += translateDP3(prog, i); + break; + case OPCODE_DPH: + i += translateDPH(prog, i); + break; + case OPCODE_FLR: + i += translateFLR(prog, i); + break; + case OPCODE_SUB: + i += translateSUB(prog, i); + break; + case OPCODE_SWZ: + i += translateSWZ(prog, i); + break; + case OPCODE_XPD: + i += translateXPD(prog, i); + break; + default: + break; } } - insert_wpos(vp, prog, tempregi); } -static struct r300_vertex_program *build_program(struct r300_vertex_program_key - *wanted_key, struct gl_vertex_program - *mesa_vp, GLint wpos_idx) +#define ADD_OUTPUT(fp_attr, vp_result) \ + do { \ + if ((FpReads & (1 << (fp_attr))) && !(prog->OutputsWritten & (1 << (vp_result)))) { \ + OutputsAdded |= 1 << (vp_result); \ + count++; \ + } \ + } while (0) + +static void addArtificialOutputs(GLcontext *ctx, struct gl_program *prog) { - struct r300_vertex_program *vp; + r300ContextPtr r300 = R300_CONTEXT(ctx); + GLuint OutputsAdded, FpReads; + int i, count; - vp = _mesa_calloc(sizeof(*vp)); - _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key)); - vp->wpos_idx = wpos_idx; + OutputsAdded = 0; + count = 0; + FpReads = r300->selected_fp->Base->InputsRead; + + ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0); + ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1); + + for (i = 0; i < 7; ++i) { + ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i); + } + + /* Some outputs may be artificially added, to match the inputs of the fragment program. + * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by + * vertex program are undefined, so just use MOV [vertex_result], CONST[0] + */ + if (count > 0) { + struct prog_instruction *inst; + + _mesa_insert_instructions(prog, prog->NumInstructions - 1, count); + inst = &prog->Instructions[prog->NumInstructions - 1 - count]; + + for (i = 0; i < VERT_RESULT_MAX; ++i) { + if (OutputsAdded & (1 << i)) { + inst->Opcode = OPCODE_MOV; + + inst->DstReg.File = PROGRAM_OUTPUT; + inst->DstReg.Index = i; + inst->DstReg.WriteMask = WRITEMASK_XYZW; + inst->DstReg.CondMask = COND_TR; + + inst->SrcReg[0].File = PROGRAM_CONSTANT; + inst->SrcReg[0].Index = 0; + inst->SrcReg[0].Swizzle = SWIZZLE_XYZW; - if (mesa_vp->IsPositionInvariant) { - position_invariant(&mesa_vp->Base); + ++inst; + } + } + + prog->OutputsWritten |= OutputsAdded; } +} + +#undef ADD_OUTPUT + +static void nqssadceInit(struct nqssadce_state* s) +{ + r300ContextPtr r300 = R300_CONTEXT(s->Ctx); + GLuint fp_reads; + + fp_reads = r300->selected_fp->Base->InputsRead; + { + if (fp_reads & FRAG_BIT_COL0) { + s->Outputs[VERT_RESULT_COL0].Sourced = WRITEMASK_XYZW; + s->Outputs[VERT_RESULT_BFC0].Sourced = WRITEMASK_XYZW; + } - if (wpos_idx > -1) { - pos_as_texcoord(vp, &mesa_vp->Base); + if (fp_reads & FRAG_BIT_COL1) { + s->Outputs[VERT_RESULT_COL1].Sourced = WRITEMASK_XYZW; + s->Outputs[VERT_RESULT_BFC1].Sourced = WRITEMASK_XYZW; + } } - assert(mesa_vp->Base.NumInstructions); - vp->num_temporaries = mesa_vp->Base.NumTemporaries; - r300TranslateVertexShader(vp, mesa_vp->Base.Instructions); + { + int i; + for (i = 0; i < 8; ++i) { + if (fp_reads & FRAG_BIT_TEX(i)) { + s->Outputs[VERT_RESULT_TEX0 + i].Sourced = WRITEMASK_XYZW; + } + } + } - return vp; + s->Outputs[VERT_RESULT_HPOS].Sourced = WRITEMASK_XYZW; + if (s->Program->OutputsWritten & (1 << VERT_RESULT_PSIZ)) + s->Outputs[VERT_RESULT_PSIZ].Sourced = WRITEMASK_X; } -static void add_outputs(struct r300_vertex_program_key *key, GLint vert) +static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg) { - if (key->OutputsWritten & (1 << vert)) - return; + (void) opcode; + (void) reg; - key->OutputsWritten |= 1 << vert; - key->OutputsAdded |= 1 << vert; + return GL_TRUE; } -void r300SelectVertexShader(r300ContextPtr r300) +static struct r300_vertex_program *build_program(GLcontext *ctx, + struct r300_vertex_program_key *wanted_key, + const struct gl_vertex_program *mesa_vp) { - GLcontext *ctx = ctx = r300->radeon.glCtx; - GLuint InputsRead; - struct r300_vertex_program_key wanted_key = { 0 }; - GLint i; - struct r300_vertex_program_cont *vpc; + r300ContextPtr r300 = R300_CONTEXT(ctx); struct r300_vertex_program *vp; - GLint wpos_idx; + struct gl_program *prog; - vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; - wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead; - wanted_key.OutputsWritten = vpc->mesa_program.Base.OutputsWritten; - InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; - - wpos_idx = -1; - if (InputsRead & FRAG_BIT_WPOS) { - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) - if (!(InputsRead & (FRAG_BIT_TEX0 << i))) - break; + vp = _mesa_calloc(sizeof(*vp)); + vp->Base = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base); + _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key)); - if (i == ctx->Const.MaxTextureUnits) { - fprintf(stderr, "\tno free texcoord found\n"); - _mesa_exit(-1); - } + prog = &vp->Base->Base; - wanted_key.OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); - wpos_idx = i; + if (RADEON_DEBUG & DEBUG_VERTS) { + fprintf(stderr, "Initial vertex program:\n"); + _mesa_print_program(prog); + fflush(stdout); } - add_outputs(&wanted_key, VERT_RESULT_HPOS); + if (vp->Base->IsPositionInvariant) { + _mesa_insert_mvp_code(ctx, vp->Base); + } + + if (r300->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) { + pos_as_texcoord(&vp->Base->Base, r300->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0); + } - if (InputsRead & FRAG_BIT_COL0) { - add_outputs(&wanted_key, VERT_RESULT_COL0); + if (r300->selected_fp->fog_attr != FRAG_ATTRIB_MAX) { + fog_as_texcoord(&vp->Base->Base, r300->selected_fp->fog_attr - FRAG_ATTRIB_TEX0); } - if (InputsRead & FRAG_BIT_COL1) { - add_outputs(&wanted_key, VERT_RESULT_COL1); + addArtificialOutputs(ctx, prog); + + translateInsts(prog); + + if (RADEON_DEBUG & DEBUG_VERTS) { + fprintf(stderr, "Vertex program after native rewrite:\n"); + _mesa_print_program(prog); + fflush(stdout); } - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - if (InputsRead & (FRAG_BIT_TEX0 << i)) { - add_outputs(&wanted_key, VERT_RESULT_TEX0 + i); + { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadceInit, + .IsNativeSwizzle = &swizzleIsNative, + .BuildSwizzle = NULL + }; + radeonNqssaDce(ctx, prog, &nqssadce); + + /* We need this step for reusing temporary registers */ + _mesa_optimize_program(ctx, prog); + + if (RADEON_DEBUG & DEBUG_VERTS) { + fprintf(stderr, "Vertex program after NQSSADCE:\n"); + _mesa_print_program(prog); + fflush(stdout); } } - if (vpc->mesa_program.IsPositionInvariant) { - /* we wan't position don't we ? */ - wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS); + assert(prog->NumInstructions); + { + struct prog_instruction *inst; + int max, i, tmp; + + inst = prog->Instructions; + max = -1; + while (inst->Opcode != OPCODE_END) { + tmp = _mesa_num_inst_src_regs(inst->Opcode); + for (i = 0; i < tmp; ++i) { + if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) { + if ((int) inst->SrcReg[i].Index > max) { + max = inst->SrcReg[i].Index; + } + } + } + + if (_mesa_num_inst_dst_regs(inst->Opcode)) { + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + if ((int) inst->DstReg.Index > max) { + max = inst->DstReg.Index; + } + } + } + ++inst; + } + + /* We actually want highest index of used temporary register, + * not the number of temporaries used. + * These values aren't always the same. + */ + vp->num_temporaries = max + 1; } - for (vp = vpc->progs; vp; vp = vp->next) + return vp; +} + +struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_vertex_program_key wanted_key = { 0 }; + struct r300_vertex_program_cont *vpc; + struct r300_vertex_program *vp; + + vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; + wanted_key.FpReads = r300->selected_fp->Base->InputsRead; + wanted_key.FogAttr = r300->selected_fp->fog_attr; + wanted_key.WPosAttr = r300->selected_fp->wpos_attr; + + for (vp = vpc->progs; vp; vp = vp->next) { if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) == 0) { - r300->selected_vp = vp; - return; + return r300->selected_vp = vp; } - //_mesa_print_program(&vpc->mesa_program.Base); + } - vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx); + vp = build_program(ctx, &wanted_key, &vpc->mesa_program); vp->next = vpc->progs; vpc->progs = vp; - r300->selected_vp = vp; + + return r300->selected_vp = vp; +} + +#define bump_vpu_count(ptr, new_count) do { \ + drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \ + int _nc=(new_count)/4; \ + assert(_nc < 256); \ + if(_nc>_p->vpu.count)_p->vpu.count=_nc; \ + } while(0) + +static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_shader_hw_code *code) +{ + int i; + + assert((code->length > 0) && (code->length % 4 == 0)); + + switch ((dest >> 8) & 0xf) { + case 0: + R300_STATECHANGE(r300, vpi); + for (i = 0; i < code->length; i++) + r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]); + bump_vpu_count(r300->hw.vpi.cmd, code->length + 4 * (dest & 0xff)); + break; + case 2: + R300_STATECHANGE(r300, vpp); + for (i = 0; i < code->length; i++) + r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]); + bump_vpu_count(r300->hw.vpp.cmd, code->length + 4 * (dest & 0xff)); + break; + case 4: + R300_STATECHANGE(r300, vps); + for (i = 0; i < code->length; i++) + r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (code->body.d[i]); + bump_vpu_count(r300->hw.vps.cmd, code->length + 4 * (dest & 0xff)); + break; + default: + fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest); + _mesa_exit(-1); + } +} + +void r300SetupVertexProgram(r300ContextPtr rmesa) +{ + GLcontext *ctx = rmesa->radeon.glCtx; + struct r300_vertex_program *prog = rmesa->selected_vp; + int inst_count = 0; + int param_count = 0; + + /* Reset state, in case we don't use something */ + ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0; + ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0; + ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0; + + R300_STATECHANGE(rmesa, vpp); + param_count = r300VertexProgUpdateParams(ctx, prog->Base, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]); + bump_vpu_count(rmesa->hw.vpp.cmd, param_count); + param_count /= 4; + + r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->hw_code)); + inst_count = (prog->hw_code.length / 4) - 1; + + r300VapCntl(rmesa, _mesa_bitcount(prog->Base->Base.InputsRead), + _mesa_bitcount(prog->Base->Base.OutputsWritten), prog->num_temporaries); + + R300_STATECHANGE(rmesa, pvs); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) | + (inst_count << R300_PVS_LAST_INST_SHIFT); + + rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT); } |