summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicolai Haehnle <nhaehnle@gmail.com>2008-07-05 22:21:24 +0200
committerNicolai Haehnle <nhaehnle@gmail.com>2008-07-06 09:59:43 +0200
commit62bccd6df0c963a14e801bcac95dc8046b978a7f (patch)
treebef7225f252c272272cc445c24c1935967554d37
parent77fdfaa23adeaaf6a217ef1ee751410c6a5b0d21 (diff)
r300: Allow adding parameters during fragprog transform, share LIT code
-rw-r--r--src/mesa/drivers/dri/r300/r300_context.h24
-rw-r--r--src/mesa/drivers/dri/r300/r300_fragprog.c18
-rw-r--r--src/mesa/drivers/dri/r300/r300_fragprog_emit.c143
-rw-r--r--src/mesa/drivers/dri/r300/r300_state.c41
-rw-r--r--src/mesa/drivers/dri/r300/r500_fragprog.c12
-rw-r--r--src/mesa/drivers/dri/r300/r500_fragprog_emit.c112
-rw-r--r--src/mesa/drivers/dri/r300/radeon_program_alu.c124
7 files changed, 216 insertions, 258 deletions
diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
index a24ab0cad7..a69beba9a7 100644
--- a/src/mesa/drivers/dri/r300/r300_context.h
+++ b/src/mesa/drivers/dri/r300/r300_context.h
@@ -716,14 +716,11 @@ struct r300_fragment_program_code {
int tex_offset;
int tex_end;
- /* Hardware constants.
- * Contains a pointer to the value. The destination of the pointer
- * is supposed to be updated when GL state changes.
- * Typically, this is either a pointer into
- * gl_program_parameter_list::ParameterValues, or a pointer to a
- * global constant (e.g. for sin/cos-approximation)
+ /**
+ * Remember which program register a given hardware constant
+ * belongs to.
*/
- const GLfloat *constant[PFS_NUM_CONST_REGS];
+ struct prog_src_register constant[PFS_NUM_CONST_REGS];
int const_nr;
int max_temp_idx;
@@ -787,14 +784,11 @@ struct r500_fragment_program_code {
int inst_offset;
int inst_end;
- /* Hardware constants.
- * Contains a pointer to the value. The destination of the pointer
- * is supposed to be updated when GL state changes.
- * Typically, this is either a pointer into
- * gl_program_parameter_list::ParameterValues, or a pointer to a
- * global constant (e.g. for sin/cos-approximation)
- */
- const GLfloat *constant[PFS_NUM_CONST_REGS];
+ /**
+ * Remember which program register a given hardware constant
+ * belongs to.
+ */
+ struct prog_src_register constant[PFS_NUM_CONST_REGS];
int const_nr;
int max_temp_idx;
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c
index 6a8ef0ef5f..57987f5d0f 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog.c
@@ -117,9 +117,7 @@ static GLboolean transform_TEX(
int factor_index;
tokens[2] = inst.TexSrcUnit;
- factor_index =
- _mesa_add_state_reference(
- compiler->fp->mesa_program.Base.Parameters, tokens);
+ factor_index = _mesa_add_state_reference(t->Program->Parameters, tokens);
tgt = radeonAppendInstructions(t->Program, 1);
@@ -303,7 +301,7 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
i++;
/* viewport transformation */
- window_index = _mesa_add_state_reference(compiler->fp->mesa_program.Base.Parameters, tokens);
+ window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens);
fpi[i].Opcode = OPCODE_MAD;
@@ -401,6 +399,11 @@ void r300TranslateFragmentShader(r300ContextPtr r300,
compiler.code = &fp->code;
compiler.program = _mesa_clone_program(r300->radeon.glCtx, &fp->mesa_program.Base);
+ if (RADEON_DEBUG & DEBUG_PIXEL) {
+ _mesa_printf("Fragment Program: Initial program:\n");
+ _mesa_print_program(compiler.program);
+ }
+
insert_WPOS_trailer(&compiler);
struct radeon_program_transformation transformations[] = {
@@ -413,13 +416,18 @@ void r300TranslateFragmentShader(r300ContextPtr r300,
2, transformations);
if (RADEON_DEBUG & DEBUG_PIXEL) {
- _mesa_printf("Program after transformations:\n");
+ _mesa_printf("Fragment Program: After transformations:\n");
_mesa_print_program(compiler.program);
}
if (!r300FragmentProgramEmit(&compiler))
fp->error = GL_TRUE;
+ /* Subtle: Rescue any parameters that have been added during transformations */
+ _mesa_free_parameter_list(fp->mesa_program.Base.Parameters);
+ fp->mesa_program.Base.Parameters = compiler.program->Parameters;
+ compiler.program->Parameters = 0;
+
_mesa_reference_program(r300->radeon.glCtx, &compiler.program, NULL);
if (!fp->error)
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c
index 889631f705..d95008edc0 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c
@@ -549,22 +549,17 @@ static void free_temp(struct r300_pfs_compile_state *cs, GLuint r)
/**
* Emit a hardware constant/parameter.
- *
- * \p cp Stable pointer to an array of 4 floats.
- * The pointer must be stable in the sense that it remains to be valid
- * and hold the contents of the constant/parameter throughout the lifetime
- * of the fragment program (actually, up until the next time the fragment
- * program is translated).
*/
static GLuint emit_const4fv(struct r300_pfs_compile_state *cs,
- const GLfloat * cp)
+ struct prog_src_register srcreg)
{
COMPILE_STATE;
GLuint reg = undef;
int index;
for (index = 0; index < code->const_nr; ++index) {
- if (code->constant[index] == cp)
+ if (code->constant[index].File == srcreg.File &&
+ code->constant[index].Index == srcreg.Index)
break;
}
@@ -575,7 +570,7 @@ static GLuint emit_const4fv(struct r300_pfs_compile_state *cs,
}
code->const_nr++;
- code->constant[index] = cp;
+ code->constant[index] = srcreg;
}
REG_SET_TYPE(reg, REG_TYPE_CONST);
@@ -806,20 +801,11 @@ static GLuint t_src(struct r300_pfs_compile_state *cs,
REG_SET_TYPE(r, REG_TYPE_INPUT);
break;
case PROGRAM_LOCAL_PARAM:
- r = emit_const4fv(cs,
- fp->mesa_program.Base.LocalParams[fpsrc.
- Index]);
- break;
case PROGRAM_ENV_PARAM:
- r = emit_const4fv(cs,
- cs->compiler->r300->radeon.glCtx->FragmentProgram.Parameters[fpsrc.Index]);
- break;
case PROGRAM_STATE_VAR:
case PROGRAM_NAMED_PARAM:
case PROGRAM_CONSTANT:
- r = emit_const4fv(cs,
- fp->mesa_program.Base.Parameters->
- ParameterValues[fpsrc.Index]);
+ r = emit_const4fv(cs, fpsrc);
break;
case PROGRAM_BUILTIN:
switch(fpsrc.Swizzle) {
@@ -1452,100 +1438,17 @@ static GLfloat SinCosConsts[2][4] = {
}
};
-/**
- * Emit a LIT instruction.
- * \p flags may be PFS_FLAG_SAT
- *
- * Definition of LIT (from ARB_fragment_program):
- * tmp = VectorLoad(op0);
- * if (tmp.x < 0) tmp.x = 0;
- * if (tmp.y < 0) tmp.y = 0;
- * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
- * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
- * result.x = 1.0;
- * result.y = tmp.x;
- * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
- * result.w = 1.0;
- *
- * The longest path of computation is the one leading to result.z,
- * consisting of 5 operations. This implementation of LIT takes
- * 5 slots. So unless there's some special undocumented opcode,
- * this implementation is potentially optimal. Unfortunately,
- * emit_arith is a bit too conservative because it doesn't understand
- * partial writes to the vector component.
- */
-static const GLfloat LitConst[4] =
- { 127.999999, 127.999999, 127.999999, -127.999999 };
-
-static void emit_lit(struct r300_pfs_compile_state *cs,
- GLuint dest, int mask, GLuint src, int flags)
+static GLuint emit_sincosconsts(struct r300_pfs_compile_state *cs, int i)
{
- COMPILE_STATE;
- GLuint cnst;
- int needTemporary;
- GLuint temp;
-
- cnst = emit_const4fv(cs, LitConst);
-
- needTemporary = 0;
- if ((mask & WRITEMASK_XYZW) != WRITEMASK_XYZW) {
- needTemporary = 1;
- } else if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) {
- // LIT is typically followed by DP3/DP4, so there's no point
- // in creating special code for this case
- needTemporary = 1;
- }
+ struct prog_src_register srcreg;
+ GLuint constant_swizzle;
- if (needTemporary) {
- temp = keep(get_temp_reg(cs));
- } else {
- temp = keep(dest);
- }
+ srcreg.File = PROGRAM_CONSTANT;
+ srcreg.Index = _mesa_add_unnamed_constant(cs->compiler->program->Parameters,
+ SinCosConsts[i], 4, &constant_swizzle);
+ srcreg.Swizzle = constant_swizzle;
- // Note: The order of emit_arith inside the slots is relevant,
- // because emit_arith only looks at scalar vs. vector when resolving
- // dependencies, and it does not consider individual vector components,
- // so swizzling between the two parts can create fake dependencies.
-
- // First slot
- emit_arith(cs, PFS_OP_MAX, temp, WRITEMASK_XY,
- keep(src), pfs_zero, undef, 0);
- emit_arith(cs, PFS_OP_MAX, temp, WRITEMASK_W, src, cnst, undef, 0);
-
- // Second slot
- emit_arith(cs, PFS_OP_MIN, temp, WRITEMASK_Z,
- swizzle(temp, W, W, W, W), cnst, undef, 0);
- emit_arith(cs, PFS_OP_LG2, temp, WRITEMASK_W,
- swizzle(temp, Y, Y, Y, Y), undef, undef, 0);
-
- // Third slot
- // If desired, we saturate the y result here.
- // This does not affect the use as a condition variable in the CMP later
- emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_W,
- temp, swizzle(temp, Z, Z, Z, Z), pfs_zero, 0);
- emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_Y,
- swizzle(temp, X, X, X, X), pfs_one, pfs_zero, flags);
-
- // Fourth slot
- emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_X,
- pfs_one, pfs_one, pfs_zero, 0);
- emit_arith(cs, PFS_OP_EX2, temp, WRITEMASK_W, temp, undef, undef, 0);
-
- // Fifth slot
- emit_arith(cs, PFS_OP_CMP, temp, WRITEMASK_Z,
- pfs_zero, swizzle(temp, W, W, W, W),
- negate(swizzle(temp, Y, Y, Y, Y)), flags);
- emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_W, pfs_one, pfs_one,
- pfs_zero, 0);
-
- if (needTemporary) {
- emit_arith(cs, PFS_OP_MAD, dest, mask,
- temp, pfs_one, pfs_zero, flags);
- free_temp(cs, temp);
- } else {
- // Decrease refcount of the destination
- t_hw_dst(cs, dest, GL_FALSE, cs->nrslots);
- }
+ return emit_const4fv(cs, srcreg);
}
static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_instruction *fpi)
@@ -1577,8 +1480,8 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst
src[1] = t_src(cs, fpi->SrcReg[1]);
src[2] = t_src(cs, fpi->SrcReg[2]);
/* ARB_f_p - if src0.c < 0.0 ? src1.c : src2.c
- * r300 - if src2.c < 0.0 ? src1.c : src0.c
- */
+ * r300 - if src2.c < 0.0 ? src1.c : src0.c
+ */
emit_arith(cs, PFS_OP_CMP, dest, mask,
src[2], src[1], src[0], flags);
break;
@@ -1592,8 +1495,8 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst
* result = sin(x)
*/
temp[0] = get_temp_reg(cs);
- const_sin[0] = emit_const4fv(cs, SinCosConsts[0]);
- const_sin[1] = emit_const4fv(cs, SinCosConsts[1]);
+ const_sin[0] = emit_sincosconsts(cs, 0);
+ const_sin[1] = emit_sincosconsts(cs, 1);
src[0] = t_scalar_src(cs, fpi->SrcReg[0]);
/* add 0.5*PI and do range reduction */
@@ -1687,10 +1590,6 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst
emit_arith(cs, PFS_OP_LG2, dest, mask,
src[0], undef, undef, flags);
break;
- case OPCODE_LIT:
- src[0] = t_src(cs, fpi->SrcReg[0]);
- emit_lit(cs, dest, mask, src[0], flags);
- break;
case OPCODE_LRP:
src[0] = t_src(cs, fpi->SrcReg[0]);
src[1] = t_src(cs, fpi->SrcReg[1]);
@@ -1758,8 +1657,8 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst
*/
temp[0] = get_temp_reg(cs);
temp[1] = get_temp_reg(cs);
- const_sin[0] = emit_const4fv(cs, SinCosConsts[0]);
- const_sin[1] = emit_const4fv(cs, SinCosConsts[1]);
+ const_sin[0] = emit_sincosconsts(cs, 0);
+ const_sin[1] = emit_sincosconsts(cs, 1);
src[0] = t_scalar_src(cs, fpi->SrcReg[0]);
/* x = -abs(x)+0.5*PI */
@@ -1825,8 +1724,8 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst
*/
temp[0] = get_temp_reg(cs);
- const_sin[0] = emit_const4fv(cs, SinCosConsts[0]);
- const_sin[1] = emit_const4fv(cs, SinCosConsts[1]);
+ const_sin[0] = emit_sincosconsts(cs, 0);
+ const_sin[1] = emit_sincosconsts(cs, 1);
src[0] = t_scalar_src(cs, fpi->SrcReg[0]);
/* do range reduction */
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index 0f7c179de8..d7a6962acc 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -2453,6 +2453,27 @@ void r300UpdateShaders(r300ContextPtr rmesa)
r300UpdateStateParameters(ctx, _NEW_PROGRAM);
}
+static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx,
+ struct gl_program *program, struct prog_src_register srcreg)
+{
+ static const GLfloat dummy[4] = { 0, 0, 0, 0 };
+
+ switch(srcreg.File) {
+ case PROGRAM_LOCAL_PARAM:
+ return program->LocalParams[srcreg.Index];
+ case PROGRAM_ENV_PARAM:
+ return ctx->FragmentProgram.Parameters[srcreg.Index];
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_NAMED_PARAM:
+ case PROGRAM_CONSTANT:
+ return program->Parameters->ParameterValues[srcreg.Index];
+ default:
+ _mesa_problem(ctx, "get_fragmentprogram_constant: Unknown\n");
+ return dummy;
+ }
+}
+
+
static void r300SetupPixelShader(r300ContextPtr rmesa)
{
GLcontext *ctx = rmesa->radeon.glCtx;
@@ -2523,10 +2544,12 @@ static void r300SetupPixelShader(r300ContextPtr rmesa)
R300_STATECHANGE(rmesa, fpp);
rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, code->const_nr * 4);
for (i = 0; i < code->const_nr; i++) {
- rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(code->constant[i][0]);
- rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(code->constant[i][1]);
- rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(code->constant[i][2]);
- rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat24(code->constant[i][3]);
+ const GLfloat *constant = get_fragmentprogram_constant(ctx,
+ &fp->mesa_program.Base, code->constant[i]);
+ rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(constant[0]);
+ rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(constant[1]);
+ rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(constant[2]);
+ rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat24(constant[3]);
}
}
@@ -2595,10 +2618,12 @@ static void r500SetupPixelShader(r300ContextPtr rmesa)
R300_STATECHANGE(rmesa, r500fp_const);
for (i = 0; i < code->const_nr; i++) {
- rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(code->constant[i][0]);
- rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(code->constant[i][1]);
- rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(code->constant[i][2]);
- rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat32(code->constant[i][3]);
+ const GLfloat *constant = get_fragmentprogram_constant(ctx,
+ &fp->mesa_program.Base, code->constant[i]);
+ rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(constant[0]);
+ rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(constant[1]);
+ rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(constant[2]);
+ rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat32(constant[3]);
}
bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, code->const_nr * 4);
diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c
index 7ee8494722..1cdb065354 100644
--- a/src/mesa/drivers/dri/r300/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/r500_fragprog.c
@@ -212,7 +212,7 @@ static void insert_WPOS_trailer(struct r500_fragment_program_compiler *compiler)
i++;
/* viewport transformation */
- window_index = _mesa_add_state_reference(compiler->fp->mesa_program.Base.Parameters, tokens);
+ window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens);
fpi[i].Opcode = OPCODE_MAD;
@@ -332,6 +332,11 @@ void r500TranslateFragmentShader(r300ContextPtr r300,
fp->translated = r500FragmentProgramEmit(&compiler);
+ /* Subtle: Rescue any parameters that have been added during transformations */
+ _mesa_free_parameter_list(fp->mesa_program.Base.Parameters);
+ fp->mesa_program.Base.Parameters = compiler.program->Parameters;
+ compiler.program->Parameters = 0;
+
_mesa_reference_program(r300->radeon.glCtx, &compiler.program, 0);
r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM);
@@ -461,9 +466,8 @@ static void dump_program(struct r500_fragment_program_code *code)
if (code->const_nr) {
fprintf(stderr, "--------\nConstants:\n");
for (n = 0; n < code->const_nr; n++) {
- fprintf(stderr, "Constant %d: %f %f\n\t %f %f\n", n,
- code->constant[n][0], code->constant[n][1], code->constant[n][2],
- code->constant[n][3]);
+ fprintf(stderr, "Constant %d: %i[%i]\n", n,
+ code->constant[n].File, code->constant[n].Index);
}
fprintf(stderr, "--------\n");
}
diff --git a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c
index 0e95c81e48..c79bff96bd 100644
--- a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c
@@ -266,7 +266,7 @@ static int get_temp(struct r500_pfs_compile_state *cs, int slot) {
/* Borrowed verbatim from r300_fragprog since it hasn't changed. */
static GLuint emit_const4fv(struct r500_pfs_compile_state *cs,
- const GLfloat * cp)
+ struct prog_src_register srcreg)
{
PROG_CODE;
@@ -274,7 +274,8 @@ static GLuint emit_const4fv(struct r500_pfs_compile_state *cs,
int index;
for (index = 0; index < code->const_nr; ++index) {
- if (code->constant[index] == cp)
+ if (code->constant[index].File == srcreg.File &&
+ code->constant[index].Index == srcreg.Index)
break;
}
@@ -285,7 +286,7 @@ static GLuint emit_const4fv(struct r500_pfs_compile_state *cs,
}
code->const_nr++;
- code->constant[index] = cp;
+ code->constant[index] = srcreg;
}
reg = index | REG_CONSTANT;
@@ -303,18 +304,11 @@ static GLuint make_src(struct r500_pfs_compile_state *cs, struct prog_src_regist
reg = cs->inputs[src.Index].reg;
break;
case PROGRAM_LOCAL_PARAM:
- reg = emit_const4fv(cs,
- cs->compiler->fp->mesa_program.Base.LocalParams[src.Index]);
- break;
case PROGRAM_ENV_PARAM:
- reg = emit_const4fv(cs,
- cs->compiler->r300->radeon.glCtx->FragmentProgram.Parameters[src.Index]);
- break;
case PROGRAM_STATE_VAR:
case PROGRAM_NAMED_PARAM:
case PROGRAM_CONSTANT:
- reg = emit_const4fv(cs,
- cs->compiler->fp->mesa_program.Base.Parameters->ParameterValues[src.Index]);
+ reg = emit_const4fv(cs, src);
break;
case PROGRAM_BUILTIN:
reg = 0x0;
@@ -628,12 +622,20 @@ static void emit_trig(struct r500_pfs_compile_state *cs, struct prog_instruction
temp.Index = get_temp(cs, 0);
temp.WriteMask = WRITEMASK_W;
+ struct prog_src_register srcreg;
+ GLuint constant_swizzle;
+
+ srcreg.File = PROGRAM_CONSTANT;
+ srcreg.Index = _mesa_add_unnamed_constant(cs->compiler->program->Parameters,
+ RCP_2PI, 4, &constant_swizzle);
+ srcreg.Swizzle = constant_swizzle;
+
/* temp = Input*(1/2pi) */
ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, temp);
set_src0(cs, ip, fpi->SrcReg[0]);
- set_src1_direct(cs, ip, emit_const4fv(cs, RCP_2PI));
+ set_src1(cs, ip, srcreg);
set_argA(cs, ip, 0, R500_SWIZ_RGB_ZERO, make_sop_swizzle(fpi->SrcReg[0]));
- set_argB(cs, ip, 1, R500_SWIZ_RGB_ZERO, SWIZZLE_W);
+ set_argB(cs, ip, 1, R500_SWIZ_RGB_ZERO, make_alpha_swizzle(srcreg));
set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO);
/* temp = frac(dst) */
@@ -660,87 +662,6 @@ static void emit_trig(struct r500_pfs_compile_state *cs, struct prog_instruction
}
}
-/**
- * Emit a LIT instruction.
- *
- * Definition of LIT (from ARB_fragment_program):
- * tmp = VectorLoad(op0);
- * if (tmp.x < 0) tmp.x = 0;
- * if (tmp.y < 0) tmp.y = 0;
- * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
- * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
- * result.x = 1.0;
- * result.y = tmp.x;
- * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
- * result.w = 1.0;
- */
-static void emit_lit(struct r500_pfs_compile_state *cs, struct prog_instruction *fpi)
-{
- GLuint cnst;
- int needTemporary;
- GLuint temp;
- int ip;
-
- cnst = emit_const4fv(cs, LIT);
-
- needTemporary = 0;
- if (fpi->DstReg.WriteMask != WRITEMASK_XYZW || fpi->DstReg.File == PROGRAM_OUTPUT)
- needTemporary = 1;
-
- if (needTemporary) {
- temp = get_temp(cs, 0);
- } else {
- temp = fpi->DstReg.Index;
- }
-
- // MAX tmp.xyw, op0, { 0, 0, 0, -128+eps }
- ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_MAX, R500_ALPHA_OP_MAX, temp, WRITEMASK_XYW);
- set_src0(cs, ip, fpi->SrcReg[0]);
- set_src1_direct(cs, ip, cnst);
- set_argA_reg(cs, ip, 0, fpi->SrcReg[0]);
- set_argB(cs, ip, 1, R500_SWIZ_RGB_ZERO, SWIZZLE_W);
-
- // MIN tmp.z, tmp.w, { 128-eps }
- // LG2 tmp.w, tmp.y
- ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_MIN, R500_ALPHA_OP_LN2, temp, WRITEMASK_ZW);
- set_src0_direct(cs, ip, temp);
- set_src1_direct(cs, ip, cnst);
- set_argA(cs, ip, 0, SWIZZLE_W | (SWIZZLE_W<<3) | (SWIZZLE_W<<6), SWIZZLE_Y);
- set_argB(cs, ip, 1, SWIZZLE_X | (SWIZZLE_X<<3) | (SWIZZLE_X<<6), SWIZZLE_X);
-
- // MOV tmp.y, tmp.x
- // MUL tmp.w, tmp.z, tmp.w
- ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, temp, WRITEMASK_YW);
- set_src0_direct(cs, ip, temp);
- set_argA(cs, ip, 0, SWIZZLE_X | (SWIZZLE_X<<3) | (SWIZZLE_X<<6), SWIZZLE_Z);
- set_argB(cs, ip, 0, R500_SWIZ_RGB_ONE, SWIZZLE_W);
- set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO);
-
- // MOV tmp.x, 1.0
- // EX2 tmp.w, tmp.w
- ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_CMP, R500_ALPHA_OP_EX2, temp, WRITEMASK_XW);
- set_src0_direct(cs, ip, temp);
- set_argA(cs, ip, 0, R500_SWIZ_RGB_ONE, SWIZZLE_W);
- set_argB(cs, ip, 0, R500_SWIZ_RGB_ONE, R500_SWIZZLE_ZERO);
- set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO);
-
- // tmp.z := (-tmp.x >= 0) ? tmp.y : 0.0
- // MOV tmp.w, 1.0
- ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_CMP, R500_ALPHA_OP_CMP, temp, WRITEMASK_ZW);
- set_src0_direct(cs, ip, temp);
- set_argA(cs, ip, 0, R500_SWIZZLE_ZERO, R500_SWIZZLE_ONE);
- set_argB(cs, ip, 0, SWIZZLE_W | (SWIZZLE_W<<3) | (SWIZZLE_W<<6), R500_SWIZZLE_ONE);
- set_argC(cs, ip, 0, SWIZZLE_Y | (SWIZZLE_Y<<3) | (SWIZZLE_Y<<6) | (R500_SWIZ_MOD_NEG<<9), R500_SWIZZLE_ZERO);
-
- if (needTemporary) {
- ip = emit_alu(cs, R500_ALU_RGBA_OP_CMP, R500_ALPHA_OP_CMP, fpi->DstReg);
- set_src0_direct(cs, ip, temp);
- set_argA(cs, ip, 0, R500_SWIZ_RGB_RGB, SWIZZLE_W);
- set_argB(cs, ip, 1, R500_SWIZ_RGB_RGB, SWIZZLE_W);
- set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO);
- }
-}
-
static void do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction *fpi) {
PROG_CODE;
GLuint src[3], dest = 0;
@@ -830,9 +751,6 @@ static void do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction *
src[0] = make_src(cs, fpi->SrcReg[0]);
emit_sop(cs, R500_ALPHA_OP_LN2, fpi->DstReg, src[0], make_sop_swizzle(fpi->SrcReg[0]));
break;
- case OPCODE_LIT:
- emit_lit(cs, fpi);
- break;
case OPCODE_LRP:
/* result = src0*src1 + (1-src0)*src2
* = src0*src1 + src2 + (-src0)*src2
diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.c b/src/mesa/drivers/dri/r300/radeon_program_alu.c
index d6d016d7c1..85ea810523 100644
--- a/src/mesa/drivers/dri/r300/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/radeon_program_alu.c
@@ -35,6 +35,8 @@
#include "radeon_program_alu.h"
+#include "shader/prog_parameter.h"
+
static struct prog_instruction *emit1(struct gl_program* p,
gl_inst_opcode Opcode, struct prog_dst_register DstReg,
@@ -101,6 +103,19 @@ static struct prog_dst_register dstreg(int file, int index)
return dst;
}
+static struct prog_dst_register dstregtmpmask(int index, int mask)
+{
+ struct prog_dst_register dst;
+ dst.File = PROGRAM_TEMPORARY;
+ dst.Index = index;
+ dst.WriteMask = mask;
+ dst.CondMask = COND_TR;
+ dst.CondSwizzle = SWIZZLE_NOOP;
+ dst.CondSrc = 0;
+ dst.pad = 0;
+ return dst;
+}
+
static const struct prog_src_register builtin_zero = {
.File = PROGRAM_BUILTIN,
.Index = 0,
@@ -125,6 +140,15 @@ static struct prog_src_register srcreg(int file, int index)
return src;
}
+static struct prog_src_register srcregswz(int file, int index, int swz)
+{
+ struct prog_src_register src = srcreg_undefined;
+ src.File = file;
+ src.Index = index;
+ src.Swizzle = swz;
+ return src;
+}
+
static struct prog_src_register negate(struct prog_src_register reg)
{
struct prog_src_register newreg = reg;
@@ -136,10 +160,10 @@ static struct prog_src_register swizzle(struct prog_src_register reg, GLuint x,
{
struct prog_src_register swizzled = reg;
swizzled.Swizzle = MAKE_SWIZZLE4(
- GET_SWZ(reg.Swizzle, x),
- GET_SWZ(reg.Swizzle, y),
- GET_SWZ(reg.Swizzle, z),
- GET_SWZ(reg.Swizzle, w));
+ x >= 4 ? x : GET_SWZ(reg.Swizzle, x),
+ y >= 4 ? y : GET_SWZ(reg.Swizzle, y),
+ z >= 4 ? z : GET_SWZ(reg.Swizzle, z),
+ w >= 4 ? w : GET_SWZ(reg.Swizzle, w));
return swizzled;
}
@@ -185,6 +209,93 @@ static void transform_FLR(struct radeon_transform_context* t,
emit2(t->Program, OPCODE_ADD, inst->DstReg, inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
}
+/**
+ * Definition of LIT (from ARB_fragment_program):
+ *
+ * tmp = VectorLoad(op0);
+ * if (tmp.x < 0) tmp.x = 0;
+ * if (tmp.y < 0) tmp.y = 0;
+ * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
+ * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
+ * result.x = 1.0;
+ * result.y = tmp.x;
+ * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
+ * result.w = 1.0;
+ *
+ * The longest path of computation is the one leading to result.z,
+ * consisting of 5 operations. This implementation of LIT takes
+ * 5 slots, if the subsequent optimization passes are clever enough
+ * to pair instructions correctly.
+ */
+static void transform_LIT(struct radeon_transform_context* t,
+ struct prog_instruction* inst)
+{
+ static const GLfloat LitConst[4] = { -127.999999 };
+
+ GLuint constant;
+ GLuint constant_swizzle;
+ GLuint temp;
+ int needTemporary = 0;
+ struct prog_src_register srctemp;
+
+ constant = _mesa_add_unnamed_constant(t->Program->Parameters, LitConst, 1, &constant_swizzle);
+
+ if (inst->DstReg.WriteMask != WRITEMASK_XYZW) {
+ needTemporary = 1;
+ } else if (inst->DstReg.File != PROGRAM_TEMPORARY) {
+ // LIT is typically followed by DP3/DP4, so there's no point
+ // in creating special code for this case
+ needTemporary = 1;
+ }
+
+ if (needTemporary) {
+ temp = radeonFindFreeTemporary(t);
+ } else {
+ temp = inst->DstReg.Index;
+ }
+ srctemp = srcreg(PROGRAM_TEMPORARY, temp);
+
+ // tmp.x = max(0.0, Src.x);
+ // tmp.y = max(0.0, Src.y);
+ // tmp.w = clamp(Src.z, -128+eps, 128-eps);
+ emit2(t->Program, OPCODE_MAX,
+ dstregtmpmask(temp, WRITEMASK_XYW),
+ inst->SrcReg[0],
+ swizzle(srcreg(PROGRAM_CONSTANT, constant),
+ SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, constant_swizzle&3));
+ emit2(t->Program, OPCODE_MIN,
+ dstregtmpmask(temp, WRITEMASK_Z),
+ swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ negate(srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle)));
+
+ // tmp.w = Pow(tmp.y, tmp.w)
+ emit1(t->Program, OPCODE_LG2,
+ dstregtmpmask(temp, WRITEMASK_W),
+ swizzle(srctemp, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y));
+ emit2(t->Program, OPCODE_MUL,
+ dstregtmpmask(temp, WRITEMASK_W),
+ swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ swizzle(srctemp, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z));
+ emit1(t->Program, OPCODE_EX2,
+ dstregtmpmask(temp, WRITEMASK_W),
+ swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
+
+ // tmp.z = (tmp.x > 0) ? tmp.w : 0.0
+ emit3(t->Program, OPCODE_CMP,
+ dstregtmpmask(temp, WRITEMASK_Z),
+ negate(swizzle(srctemp, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
+ swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ builtin_zero);
+
+ // tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0
+ emit1(t->Program, OPCODE_MOV,
+ dstregtmpmask(temp, WRITEMASK_XYW),
+ swizzle(srctemp, SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE));
+
+ if (needTemporary)
+ emit1(t->Program, OPCODE_MOV, inst->DstReg, srctemp);
+}
+
static void transform_POW(struct radeon_transform_context* t,
struct prog_instruction* inst)
{
@@ -249,13 +360,11 @@ static void transform_XPD(struct radeon_transform_context* t,
* no userData necessary.
*
* Eliminates the following ALU instructions:
- * ABS, DPH, FLR, POW, SGE, SLT, SUB, SWZ, XPD
+ * ABS, DPH, FLR, LIT, POW, SGE, SLT, SUB, SWZ, XPD
* using:
* MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP
*
* @note should be applicable to R300 and R500 fragment programs.
- *
- * @todo add LIT here as well?
*/
GLboolean radeonTransformALU(struct radeon_transform_context* t,
struct prog_instruction* inst,
@@ -265,6 +374,7 @@ GLboolean radeonTransformALU(struct radeon_transform_context* t,
case OPCODE_ABS: transform_ABS(t, inst); return GL_TRUE;
case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE;
case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE;
+ case OPCODE_LIT: transform_LIT(t, inst); return GL_TRUE;
case OPCODE_POW: transform_POW(t, inst); return GL_TRUE;
case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE;
case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE;