summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c')
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c412
1 files changed, 199 insertions, 213 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index 93a516105e..1b2cb8dde7 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -22,13 +22,13 @@
#include "radeon_compiler.h"
+#include <stdio.h>
+
#include "../r300_reg.h"
-#include "radeon_nqssadce.h"
-#include "radeon_program.h"
+#include "radeon_dataflow.h"
#include "radeon_program_alu.h"
-
-#include "shader/prog_print.h"
+#include "radeon_swizzle.h"
/*
@@ -42,103 +42,83 @@
t_swizzle(y), \
t_swizzle(y), \
t_src_class(vpi->SrcReg[x].File), \
- NEGATE_NONE) | (vpi->SrcReg[x].RelAddr << 4))
+ RC_MASK_NONE) | (vpi->SrcReg[x].RelAddr << 4))
-static unsigned long t_dst_mask(GLuint mask)
+static unsigned long t_dst_mask(unsigned int mask)
{
- /* WRITEMASK_* is equivalent to VSF_FLAG_* */
- return mask & WRITEMASK_XYZW;
+ /* RC_MASK_* is equivalent to VSF_FLAG_* */
+ return mask & RC_MASK_XYZW;
}
-static unsigned long t_dst_class(gl_register_file file)
+static unsigned long t_dst_class(rc_register_file file)
{
-
switch (file) {
- case PROGRAM_TEMPORARY:
+ default:
+ fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
+ /* fall-through */
+ case RC_FILE_TEMPORARY:
return PVS_DST_REG_TEMPORARY;
- case PROGRAM_OUTPUT:
+ case RC_FILE_OUTPUT:
return PVS_DST_REG_OUT;
- case PROGRAM_ADDRESS:
+ case RC_FILE_ADDRESS:
return PVS_DST_REG_A0;
- /*
- case PROGRAM_INPUT:
- case PROGRAM_LOCAL_PARAM:
- case PROGRAM_ENV_PARAM:
- case PROGRAM_NAMED_PARAM:
- case PROGRAM_STATE_VAR:
- case PROGRAM_WRITE_ONLY:
- case PROGRAM_ADDRESS:
- */
- default:
- fprintf(stderr, "problem in %s", __FUNCTION__);
- _mesa_exit(-1);
- return -1;
}
}
static unsigned long t_dst_index(struct r300_vertex_program_code *vp,
- struct prog_dst_register *dst)
+ struct rc_dst_register *dst)
{
- if (dst->File == PROGRAM_OUTPUT)
+ if (dst->File == RC_FILE_OUTPUT)
return vp->outputs[dst->Index];
return dst->Index;
}
-static unsigned long t_src_class(gl_register_file file)
+static unsigned long t_src_class(rc_register_file file)
{
switch (file) {
- case PROGRAM_TEMPORARY:
+ default:
+ fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
+ /* fall-through */
+ case RC_FILE_NONE:
+ case RC_FILE_TEMPORARY:
return PVS_SRC_REG_TEMPORARY;
- case PROGRAM_INPUT:
+ case RC_FILE_INPUT:
return PVS_SRC_REG_INPUT;
- case PROGRAM_LOCAL_PARAM:
- case PROGRAM_ENV_PARAM:
- case PROGRAM_NAMED_PARAM:
- case PROGRAM_CONSTANT:
- case PROGRAM_STATE_VAR:
+ case RC_FILE_CONSTANT:
return PVS_SRC_REG_CONSTANT;
- /*
- case PROGRAM_OUTPUT:
- case PROGRAM_WRITE_ONLY:
- case PROGRAM_ADDRESS:
- */
- default:
- fprintf(stderr, "problem in %s", __FUNCTION__);
- _mesa_exit(-1);
- return -1;
}
}
-static GLboolean t_src_conflict(struct prog_src_register a, struct prog_src_register b)
+static int t_src_conflict(struct rc_src_register a, struct rc_src_register b)
{
unsigned long aclass = t_src_class(a.File);
unsigned long bclass = t_src_class(b.File);
if (aclass != bclass)
- return GL_FALSE;
+ return 0;
if (aclass == PVS_SRC_REG_TEMPORARY)
- return GL_FALSE;
+ return 0;
if (a.RelAddr || b.RelAddr)
- return GL_TRUE;
+ return 1;
if (a.Index != b.Index)
- return GL_TRUE;
+ return 1;
- return GL_FALSE;
+ return 0;
}
-static INLINE unsigned long t_swizzle(GLubyte swizzle)
+static inline unsigned long t_swizzle(unsigned int swizzle)
{
- /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
+ /* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
return swizzle;
}
static unsigned long t_src_index(struct r300_vertex_program_code *vp,
- struct prog_src_register *src)
+ struct rc_src_register *src)
{
- if (src->File == PROGRAM_INPUT) {
+ if (src->File == RC_FILE_INPUT) {
assert(vp->inputs[src->Index] != -1);
return vp->inputs[src->Index];
} else {
@@ -154,9 +134,9 @@ static unsigned long t_src_index(struct r300_vertex_program_code *vp,
/* these two functions should probably be merged... */
static unsigned long t_src(struct r300_vertex_program_code *vp,
- struct prog_src_register *src)
+ struct rc_src_register *src)
{
- /* src->Negate uses the NEGATE_ flags from program_instruction.h,
+ /* src->Negate uses the RC_MASK_ flags from program_instruction.h,
* which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
*/
return PVS_SRC_OPERAND(t_src_index(vp, src),
@@ -169,9 +149,9 @@ static unsigned long t_src(struct r300_vertex_program_code *vp,
}
static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
- struct prog_src_register *src)
+ struct rc_src_register *src)
{
- /* src->Negate uses the NEGATE_ flags from program_instruction.h,
+ /* src->Negate uses the RC_MASK_ flags from program_instruction.h,
* which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
*/
return PVS_SRC_OPERAND(t_src_index(vp, src),
@@ -180,79 +160,79 @@ static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
t_swizzle(GET_SWZ(src->Swizzle, 0)),
t_swizzle(GET_SWZ(src->Swizzle, 0)),
t_src_class(src->File),
- src->Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
(src->RelAddr << 4);
}
-static GLboolean valid_dst(struct r300_vertex_program_code *vp,
- struct prog_dst_register *dst)
+static int valid_dst(struct r300_vertex_program_code *vp,
+ struct rc_dst_register *dst)
{
- if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
- return GL_FALSE;
- } else if (dst->File == PROGRAM_ADDRESS) {
+ if (dst->File == RC_FILE_OUTPUT && vp->outputs[dst->Index] == -1) {
+ return 0;
+ } else if (dst->File == RC_FILE_ADDRESS) {
assert(dst->Index == 0);
}
- return GL_TRUE;
+ return 1;
}
static void ei_vector1(struct r300_vertex_program_code *vp,
- GLuint hw_opcode,
- struct prog_instruction *vpi,
- GLuint * inst)
+ unsigned int hw_opcode,
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
{
inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
- GL_FALSE,
- GL_FALSE,
+ 0,
+ 0,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
inst[1] = t_src(vp, &vpi->SrcReg[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
+ inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
+ inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
}
static void ei_vector2(struct r300_vertex_program_code *vp,
- GLuint hw_opcode,
- struct prog_instruction *vpi,
- GLuint * inst)
+ unsigned int hw_opcode,
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
{
inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
- GL_FALSE,
- GL_FALSE,
+ 0,
+ 0,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
inst[1] = t_src(vp, &vpi->SrcReg[0]);
inst[2] = t_src(vp, &vpi->SrcReg[1]);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
+ inst[3] = __CONST(1, RC_SWIZZLE_ZERO);
}
static void ei_math1(struct r300_vertex_program_code *vp,
- GLuint hw_opcode,
- struct prog_instruction *vpi,
- GLuint * inst)
+ unsigned int hw_opcode,
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
{
inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
- GL_TRUE,
- GL_FALSE,
+ 1,
+ 0,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
+ inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
+ inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
}
static void ei_lit(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst)
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
{
//LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
- GL_TRUE,
- GL_FALSE,
+ 1,
+ 0,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
@@ -262,27 +242,27 @@ static void ei_lit(struct r300_vertex_program_code *vp,
PVS_SRC_SELECT_FORCE_0, // Z
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
t_src_class(vpi->SrcReg[0].File),
- vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
(vpi->SrcReg[0].RelAddr << 4);
inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
PVS_SRC_SELECT_FORCE_0, // Z
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
t_src_class(vpi->SrcReg[0].File),
- vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
(vpi->SrcReg[0].RelAddr << 4);
inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
PVS_SRC_SELECT_FORCE_0, // Z
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
t_src_class(vpi->SrcReg[0].File),
- vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
(vpi->SrcReg[0].RelAddr << 4);
}
static void ei_mad(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst)
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
{
/* Remarks about hardware limitations of MAD
* (please preserve this comment, as this information is _NOT_
@@ -310,22 +290,22 @@ static void ei_mad(struct r300_vertex_program_code *vp,
* according to AMD docs, this should improve performance by one clock
* as a nice side bonus.
*/
- if (vpi->SrcReg[0].File == PROGRAM_TEMPORARY &&
- vpi->SrcReg[1].File == PROGRAM_TEMPORARY &&
- vpi->SrcReg[2].File == PROGRAM_TEMPORARY &&
+ if (vpi->SrcReg[0].File == RC_FILE_TEMPORARY &&
+ vpi->SrcReg[1].File == RC_FILE_TEMPORARY &&
+ vpi->SrcReg[2].File == RC_FILE_TEMPORARY &&
vpi->SrcReg[0].Index != vpi->SrcReg[1].Index &&
vpi->SrcReg[0].Index != vpi->SrcReg[2].Index &&
vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) {
inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
- GL_FALSE,
- GL_TRUE,
+ 0,
+ 1,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
} else {
inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
- GL_FALSE,
- GL_FALSE,
+ 0,
+ 0,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
@@ -336,17 +316,17 @@ static void ei_mad(struct r300_vertex_program_code *vp,
}
static void ei_pow(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst)
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
{
inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
- GL_TRUE,
- GL_FALSE,
+ 1,
+ 0,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
}
@@ -361,8 +341,8 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
compiler->SetHwInputOutput(compiler);
for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
- struct prog_instruction *vpi = &rci->I;
- GLuint *inst = compiler->code->body.d + compiler->code->length;
+ struct rc_sub_instruction *vpi = &rci->U.I;
+ unsigned int *inst = compiler->code->body.d + compiler->code->length;
/* Skip instructions writing to non-existing destination */
if (!valid_dst(compiler->code, &vpi->DstReg))
@@ -374,26 +354,26 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
}
switch (vpi->Opcode) {
- case OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
- case OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
- case OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
- case OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
- case OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
- case OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
- case OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
- case OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
- case OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
- case OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
- case OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break;
- case OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break;
- case OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break;
- case OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break;
- case OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break;
- case OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
- case OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
- case OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
- case OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
- case OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
+ case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
+ case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
+ case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
+ case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
+ case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
+ case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
+ case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
+ case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
+ case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
+ case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
+ case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break;
+ case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break;
+ case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break;
+ case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break;
+ case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break;
+ case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
+ case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
+ case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
+ case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
+ case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
default:
rc_error(&compiler->Base, "Unknown opcode %i\n", vpi->Opcode);
return;
@@ -407,38 +387,37 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
}
struct temporary_allocation {
- GLuint Allocated:1;
- GLuint HwTemp:15;
+ unsigned int Allocated:1;
+ unsigned int HwTemp:15;
struct rc_instruction * LastRead;
};
static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler)
{
struct rc_instruction *inst;
- GLuint num_orig_temps = 0;
- GLboolean hwtemps[VSF_MAX_FRAGMENT_TEMPS];
+ unsigned int num_orig_temps = 0;
+ char hwtemps[VSF_MAX_FRAGMENT_TEMPS];
struct temporary_allocation * ta;
- GLuint i, j;
+ unsigned int i, j;
compiler->code->num_temporaries = 0;
memset(hwtemps, 0, sizeof(hwtemps));
/* Pass 1: Count original temporaries and allocate structures */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
- GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
- GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
- for (i = 0; i < numsrcs; ++i) {
- if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) {
- if (inst->I.SrcReg[i].Index >= num_orig_temps)
- num_orig_temps = inst->I.SrcReg[i].Index + 1;
+ for (i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+ if (inst->U.I.SrcReg[i].Index >= num_orig_temps)
+ num_orig_temps = inst->U.I.SrcReg[i].Index + 1;
}
}
- if (numdsts) {
- if (inst->I.DstReg.File == PROGRAM_TEMPORARY) {
- if (inst->I.DstReg.Index >= num_orig_temps)
- num_orig_temps = inst->I.DstReg.Index + 1;
+ if (opcode->HasDstReg) {
+ if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
+ if (inst->U.I.DstReg.Index >= num_orig_temps)
+ num_orig_temps = inst->U.I.DstReg.Index + 1;
}
}
}
@@ -449,32 +428,31 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
/* Pass 2: Determine original temporary lifetimes */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
- GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
- for (i = 0; i < numsrcs; ++i) {
- if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY)
- ta[inst->I.SrcReg[i].Index].LastRead = inst;
+ for (i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY)
+ ta[inst->U.I.SrcReg[i].Index].LastRead = inst;
}
}
/* Pass 3: Register allocation */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
- GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
- GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
- for (i = 0; i < numsrcs; ++i) {
- if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) {
- GLuint orig = inst->I.SrcReg[i].Index;
- inst->I.SrcReg[i].Index = ta[orig].HwTemp;
+ for (i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+ unsigned int orig = inst->U.I.SrcReg[i].Index;
+ inst->U.I.SrcReg[i].Index = ta[orig].HwTemp;
if (ta[orig].Allocated && inst == ta[orig].LastRead)
- hwtemps[ta[orig].HwTemp] = GL_FALSE;
+ hwtemps[ta[orig].HwTemp] = 0;
}
}
- if (numdsts) {
- if (inst->I.DstReg.File == PROGRAM_TEMPORARY) {
- GLuint orig = inst->I.DstReg.Index;
+ if (opcode->HasDstReg) {
+ if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
+ unsigned int orig = inst->U.I.DstReg.Index;
if (!ta[orig].Allocated) {
for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) {
@@ -484,16 +462,16 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
if (j >= VSF_MAX_FRAGMENT_TEMPS) {
fprintf(stderr, "Out of hw temporaries\n");
} else {
- ta[orig].Allocated = GL_TRUE;
+ ta[orig].Allocated = 1;
ta[orig].HwTemp = j;
- hwtemps[j] = GL_TRUE;
+ hwtemps[j] = 1;
if (j >= compiler->code->num_temporaries)
compiler->code->num_temporaries = j + 1;
}
}
- inst->I.DstReg.Index = ta[orig].HwTemp;
+ inst->U.I.DstReg.Index = ta[orig].HwTemp;
}
}
}
@@ -504,45 +482,45 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
* Vertex engine cannot read two inputs or two constants at the same time.
* Introduce intermediate MOVs to temporary registers to account for this.
*/
-static GLboolean transform_source_conflicts(
+static int transform_source_conflicts(
struct radeon_compiler *c,
struct rc_instruction* inst,
void* unused)
{
- GLuint num_operands = _mesa_num_inst_src_regs(inst->I.Opcode);
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
- if (num_operands == 3) {
- if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[2])
- || t_src_conflict(inst->I.SrcReg[0], inst->I.SrcReg[2])) {
+ if (opcode->NumSrcRegs == 3) {
+ if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])
+ || t_src_conflict(inst->U.I.SrcReg[0], inst->U.I.SrcReg[2])) {
int tmpreg = rc_find_free_temporary(c);
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
- inst_mov->I.Opcode = OPCODE_MOV;
- inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
- inst_mov->I.DstReg.Index = tmpreg;
- inst_mov->I.SrcReg[0] = inst->I.SrcReg[2];
-
- reset_srcreg(&inst->I.SrcReg[2]);
- inst->I.SrcReg[2].File = PROGRAM_TEMPORARY;
- inst->I.SrcReg[2].Index = tmpreg;
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = tmpreg;
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+
+ reset_srcreg(&inst->U.I.SrcReg[2]);
+ inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[2].Index = tmpreg;
}
}
- if (num_operands >= 2) {
- if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[0])) {
+ if (opcode->NumSrcRegs >= 2) {
+ if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[0])) {
int tmpreg = rc_find_free_temporary(c);
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
- inst_mov->I.Opcode = OPCODE_MOV;
- inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
- inst_mov->I.DstReg.Index = tmpreg;
- inst_mov->I.SrcReg[0] = inst->I.SrcReg[1];
-
- reset_srcreg(&inst->I.SrcReg[1]);
- inst->I.SrcReg[1].File = PROGRAM_TEMPORARY;
- inst->I.SrcReg[1].Index = tmpreg;
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = tmpreg;
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
+
+ reset_srcreg(&inst->U.I.SrcReg[1]);
+ inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[1].Index = tmpreg;
}
}
- return GL_TRUE;
+ return 1;
}
static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
@@ -553,44 +531,52 @@ static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
if ((compiler->RequiredOutputs & (1 << i)) &&
!(compiler->Base.Program.OutputsWritten & (1 << i))) {
struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev);
- inst->I.Opcode = OPCODE_MOV;
+ inst->U.I.Opcode = RC_OPCODE_MOV;
- inst->I.DstReg.File = PROGRAM_OUTPUT;
- inst->I.DstReg.Index = i;
- inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
+ inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+ inst->U.I.DstReg.Index = i;
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
- inst->I.SrcReg[0].File = PROGRAM_CONSTANT;
- inst->I.SrcReg[0].Index = 0;
- inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW;
+ inst->U.I.SrcReg[0].File = RC_FILE_CONSTANT;
+ inst->U.I.SrcReg[0].Index = 0;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
compiler->Base.Program.OutputsWritten |= 1 << i;
}
}
}
-static void nqssadceInit(struct nqssadce_state* s)
+static void dataflow_outputs_mark_used(void * userdata, void * data,
+ void (*callback)(void *, unsigned int, unsigned int))
{
- struct r300_vertex_program_compiler * compiler = s->UserData;
+ struct r300_vertex_program_compiler * c = userdata;
int i;
- for(i = 0; i < VERT_RESULT_MAX; ++i) {
- if (compiler->RequiredOutputs & (1 << i))
- s->Outputs[i].Sourced = WRITEMASK_XYZW;
+ for(i = 0; i < 32; ++i) {
+ if (c->RequiredOutputs & (1 << i))
+ callback(data, i, RC_MASK_XYZW);
}
}
-static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg)
+static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
{
(void) opcode;
(void) reg;
- return GL_TRUE;
+ return 1;
}
+static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
+ .IsNative = &swizzle_is_native,
+ .Split = 0 /* should never be called */
+};
+
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
{
+ compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
+
addArtificialOutputs(compiler);
{
@@ -623,22 +609,22 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
fflush(stderr);
}
- {
- struct radeon_nqssadce_descr nqssadce = {
- .Init = &nqssadceInit,
- .IsNativeSwizzle = &swizzleIsNative,
- .BuildSwizzle = NULL
- };
- radeonNqssaDce(&compiler->Base, &nqssadce, compiler);
+ rc_dataflow_deadcode(&compiler->Base, &dataflow_outputs_mark_used, compiler);
+
+ if (compiler->Base.Debug) {
+ fprintf(stderr, "Vertex program after deadcode:\n");
+ rc_print_program(&compiler->Base.Program);
+ fflush(stderr);
+ }
- /* We need this step for reusing temporary registers */
- allocate_temporary_registers(compiler);
+ rc_dataflow_swizzles(&compiler->Base);
- if (compiler->Base.Debug) {
- fprintf(stderr, "Vertex program after NQSSADCE:\n");
- rc_print_program(&compiler->Base.Program);
- fflush(stderr);
- }
+ allocate_temporary_registers(compiler);
+
+ if (compiler->Base.Debug) {
+ fprintf(stderr, "Vertex program after dataflow:\n");
+ rc_print_program(&compiler->Base.Program);
+ fflush(stderr);
}
translate_vertex_program(compiler);