From 84445273ed554ea6fa65c894bbe098eb3f3d1230 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Thu, 23 Jul 2009 18:40:41 +0200 Subject: r300: Move vertex program compilation to compiler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is just the first step of refactoring. The separation is not yet clean enough with this commit. Signed-off-by: Nicolai Hähnle --- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 1533 ++++++++++++++++++++ 1 file changed, 1533 insertions(+) create mode 100644 src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c (limited to 'src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c') diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c new file mode 100644 index 0000000000..b074c98ee9 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -0,0 +1,1533 @@ +/* + * Copyright 2009 Nicolai Hähnle + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include "../r300_reg.h" + +#include "radeon_nqssadce.h" + +#include "shader/prog_optimize.h" +#include "shader/prog_print.h" + + +/* TODO: Get rid of t_src_class call */ +#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \ + ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \ + t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \ + (t_src_class(a.File) == PVS_SRC_REG_INPUT && \ + t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \ + +/* + * Take an already-setup and valid source then swizzle it appropriately to + * obtain a constant ZERO or ONE source. + */ +#define __CONST(x, y) \ + (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_src_class(src[x].File), \ + NEGATE_NONE) | (src[x].RelAddr << 4)) + + + + +static unsigned long t_dst_mask(GLuint mask) +{ + /* WRITEMASK_* is equivalent to VSF_FLAG_* */ + return mask & WRITEMASK_XYZW; +} + +static unsigned long t_dst_class(gl_register_file file) +{ + + switch (file) { + case PROGRAM_TEMPORARY: + return PVS_DST_REG_TEMPORARY; + case PROGRAM_OUTPUT: + return PVS_DST_REG_OUT; + case PROGRAM_ADDRESS: + return PVS_DST_REG_A0; + /* + case PROGRAM_INPUT: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_NAMED_PARAM: + case PROGRAM_STATE_VAR: + case PROGRAM_WRITE_ONLY: + case PROGRAM_ADDRESS: + */ + default: + fprintf(stderr, "problem in %s", __FUNCTION__); + _mesa_exit(-1); + return -1; + } +} + +static unsigned long t_dst_index(struct r300_vertex_program_code *vp, + struct prog_dst_register *dst) +{ + if (dst->File == PROGRAM_OUTPUT) + return vp->outputs[dst->Index]; + + return dst->Index; +} + +static unsigned long t_src_class(gl_register_file file) +{ + switch (file) { + case PROGRAM_TEMPORARY: + return PVS_SRC_REG_TEMPORARY; + case PROGRAM_INPUT: + return PVS_SRC_REG_INPUT; + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + case PROGRAM_STATE_VAR: + return PVS_SRC_REG_CONSTANT; + /* + case PROGRAM_OUTPUT: + case PROGRAM_WRITE_ONLY: + case PROGRAM_ADDRESS: + */ + default: + fprintf(stderr, "problem in %s", __FUNCTION__); + _mesa_exit(-1); + return -1; + } +} + +static INLINE unsigned long t_swizzle(GLubyte swizzle) +{ + /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ + return swizzle; +} + +static unsigned long t_src_index(struct r300_vertex_program_code *vp, + struct prog_src_register *src) +{ + if (src->File == PROGRAM_INPUT) { + assert(vp->inputs[src->Index] != -1); + return vp->inputs[src->Index]; + } else { + if (src->Index < 0) { + fprintf(stderr, + "negative offsets for indirect addressing do not work.\n"); + return 0; + } + return src->Index; + } +} + +/* these two functions should probably be merged... */ + +static unsigned long t_src(struct r300_vertex_program_code *vp, + struct prog_src_register *src) +{ + /* src->Negate uses the NEGATE_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + return PVS_SRC_OPERAND(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 1)), + t_swizzle(GET_SWZ(src->Swizzle, 2)), + t_swizzle(GET_SWZ(src->Swizzle, 3)), + t_src_class(src->File), + src->Negate) | (src->RelAddr << 4); +} + +static unsigned long t_src_scalar(struct r300_vertex_program_code *vp, + struct prog_src_register *src) +{ + /* src->Negate uses the NEGATE_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + return PVS_SRC_OPERAND(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_src_class(src->File), + src->Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src->RelAddr << 4); +} + +static GLboolean valid_dst(struct r300_vertex_program_code *vp, + struct prog_dst_register *dst) +{ + if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) { + return GL_FALSE; + } else if (dst->File == PROGRAM_ADDRESS) { + assert(dst->Index == 0); + } + + return GL_TRUE; +} + +static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W + + inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), + t_src_class(src[0].File), + (!src[0]. + Negate) ? NEGATE_XYZW : NEGATE_NONE) | + (src[0].RelAddr << 4); + inst[3] = 0; + + return inst; +} + +static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} + + inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + SWIZZLE_ZERO, + t_src_class(src[0].File), + src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src[0].RelAddr << 4); + inst[2] = + PVS_SRC_OPERAND(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO, + t_src_class(src[1].File), + src[1].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src[1].RelAddr << 4); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} + inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + PVS_SRC_SELECT_FORCE_1, + t_src_class(src[0].File), + src[0].Negate ? NEGATE_XYZ : NEGATE_NONE) | + (src[0].RelAddr << 4); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeEX2(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3], + int *u_temp_i) +{ + /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} + ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */ + + inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION, + GL_FALSE, + GL_FALSE, + *u_temp_i, + t_dst_mask(vpi->DstReg.WriteMask), + PVS_DST_REG_TEMPORARY); + inst[1] = t_src(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + inst += 4; + + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = PVS_SRC_OPERAND(*u_temp_i, + PVS_SRC_SELECT_X, + PVS_SRC_SELECT_Y, + PVS_SRC_SELECT_Z, + PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY, + /* Not 100% sure about this */ + (!src[0]. + Negate) ? NEGATE_XYZW : NEGATE_NONE); + inst[3] = __CONST(0, SWIZZLE_ZERO); + (*u_temp_i)--; + + return inst; +} + +static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X} + + inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_src_class(src[0].File), + src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src[0].RelAddr << 4); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} + + inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + /* NOTE: Users swizzling might not work. */ + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y + t_src_class(src[0].File), + src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src[0].RelAddr << 4); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X + t_src_class(src[0].File), + src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src[0].RelAddr << 4); + inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W + t_src_class(src[0].File), + src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src[0].RelAddr << 4); + + return inst; +} + +static GLuint *r300TranslateOpcodeLOG(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, + GL_FALSE, + GL_TRUE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = t_src(vp, &src[2]); + + return inst; +} + +static GLuint *r300TranslateOpcodeMAX(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeMIN(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_MINIMUM, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeMOV(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} + + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeMUL(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = t_src_scalar(vp, &src[1]); + + return inst; +} + +static GLuint *r300TranslateOpcodeRCP(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeRSQ(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeSGE(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W + +#if 0 + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), + t_src_class(src[1].File), + (!src[1]. + Negate) ? NEGATE_XYZW : NEGATE_NONE) | + (src[1].RelAddr << 4); + inst[3] = 0; +#else + inst[0] = + PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ONE); + inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), + t_src_class(src[1].File), + (!src[1]. + Negate) ? NEGATE_XYZW : NEGATE_NONE) | + (src[1].RelAddr << 4); +#endif + + return inst; +} + +static GLuint *r300TranslateOpcodeSWZ(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} + + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3], + int *u_temp_i) +{ + /* mul r0, r1.yzxw, r2.zxyw + mad r0, -r2.yzxw, r1.zxyw, r0 + */ + + inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, + GL_FALSE, + GL_FALSE, + *u_temp_i, + t_dst_mask(vpi->DstReg.WriteMask), + PVS_DST_REG_TEMPORARY); + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W + t_src_class(src[0].File), + src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src[0].RelAddr << 4); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W + t_src_class(src[1].File), + src[1].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src[1].RelAddr << 4); + inst[3] = __CONST(1, SWIZZLE_ZERO); + inst += 4; + + inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W + t_src_class(src[1].File), + (!src[1]. + Negate) ? NEGATE_XYZW : NEGATE_NONE) | + (src[1].RelAddr << 4); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W + t_src_class(src[0].File), + src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src[0].RelAddr << 4); + inst[3] = + PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, + PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, + PVS_SRC_REG_TEMPORARY, NEGATE_NONE); + + (*u_temp_i)--; + + return inst; +} + +static void t_inputs_outputs(struct r300_vertex_program_code *vp, struct gl_program * glvp) +{ + int i; + int cur_reg; + GLuint OutputsWritten, InputsRead; + + OutputsWritten = glvp->OutputsWritten; + InputsRead = glvp->InputsRead; + + cur_reg = -1; + for (i = 0; i < VERT_ATTRIB_MAX; i++) { + if (InputsRead & (1 << i)) + vp->inputs[i] = ++cur_reg; + else + vp->inputs[i] = -1; + } + + cur_reg = 0; + for (i = 0; i < VERT_RESULT_MAX; i++) + vp->outputs[i] = -1; + + assert(OutputsWritten & (1 << VERT_RESULT_HPOS)); + + if (OutputsWritten & (1 << VERT_RESULT_HPOS)) { + vp->outputs[VERT_RESULT_HPOS] = cur_reg++; + } + + if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) { + vp->outputs[VERT_RESULT_PSIZ] = cur_reg++; + } + + /* If we're writing back facing colors we need to send + * four colors to make front/back face colors selection work. + * If the vertex program doesn't write all 4 colors, lets + * pretend it does by skipping output index reg so the colors + * get written into appropriate output vectors. + */ + if (OutputsWritten & (1 << VERT_RESULT_COL0)) { + vp->outputs[VERT_RESULT_COL0] = cur_reg++; + } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || + OutputsWritten & (1 << VERT_RESULT_BFC1)) { + cur_reg++; + } + + if (OutputsWritten & (1 << VERT_RESULT_COL1)) { + vp->outputs[VERT_RESULT_COL1] = cur_reg++; + } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || + OutputsWritten & (1 << VERT_RESULT_BFC1)) { + cur_reg++; + } + + if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { + vp->outputs[VERT_RESULT_BFC0] = cur_reg++; + } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { + cur_reg++; + } + + if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { + vp->outputs[VERT_RESULT_BFC1] = cur_reg++; + } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { + cur_reg++; + } + + for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) { + if (OutputsWritten & (1 << i)) { + vp->outputs[i] = cur_reg++; + } + } + + if (OutputsWritten & (1 << VERT_RESULT_FOGC)) { + vp->outputs[VERT_RESULT_FOGC] = cur_reg++; + } +} + +static GLboolean translate_vertex_program(struct r300_vertex_program_compiler * compiler) +{ + struct prog_instruction *vpi = compiler->program->Instructions; + int i; + GLuint *inst; + unsigned long num_operands; + /* Initial value should be last tmp reg that hw supports. + Strangely enough r300 doesnt mind even though these would be out of range. + Smart enough to realize that it doesnt need it? */ + int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1; + struct prog_src_register src[3]; + struct r300_vertex_program_code * vp = compiler->code; + + compiler->code->pos_end = 0; /* Not supported yet */ + compiler->code->length = 0; + + t_inputs_outputs(compiler->code, compiler->program); + + for (inst = compiler->code->body.d; vpi->Opcode != OPCODE_END; + vpi++, inst += 4) { + + { + int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; + if((compiler->code->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { + fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", compiler->code->num_temporaries, u_temp_used); + return GL_FALSE; + } + u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; + } + + if (!valid_dst(compiler->code, &vpi->DstReg)) { + /* redirect result to unused temp */ + vpi->DstReg.File = PROGRAM_TEMPORARY; + vpi->DstReg.Index = u_temp_i; + } + + num_operands = _mesa_num_inst_src_regs(vpi->Opcode); + + /* copy the sources (src) from mesa into a local variable... is this needed? */ + for (i = 0; i < num_operands; i++) { + src[i] = vpi->SrcReg[i]; + } + + if (num_operands == 3) { /* TODO: scalars */ + if (CMP_SRCS(src[1], src[2]) + || CMP_SRCS(src[0], src[2])) { + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + u_temp_i, + WRITEMASK_XYZW, + PVS_DST_REG_TEMPORARY); + inst[1] = + PVS_SRC_OPERAND(t_src_index(compiler->code, &src[2]), + SWIZZLE_X, + SWIZZLE_Y, + SWIZZLE_Z, + SWIZZLE_W, + t_src_class(src[2].File), + NEGATE_NONE) | (src[2]. + RelAddr << + 4); + inst[2] = __CONST(2, SWIZZLE_ZERO); + inst[3] = __CONST(2, SWIZZLE_ZERO); + inst += 4; + + src[2].File = PROGRAM_TEMPORARY; + src[2].Index = u_temp_i; + src[2].RelAddr = 0; + u_temp_i--; + } + } + + if (num_operands >= 2) { + if (CMP_SRCS(src[1], src[0])) { + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + u_temp_i, + WRITEMASK_XYZW, + PVS_DST_REG_TEMPORARY); + inst[1] = + PVS_SRC_OPERAND(t_src_index(compiler->code, &src[0]), + SWIZZLE_X, + SWIZZLE_Y, + SWIZZLE_Z, + SWIZZLE_W, + t_src_class(src[0].File), + NEGATE_NONE) | (src[0]. + RelAddr << + 4); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + inst += 4; + + src[0].File = PROGRAM_TEMPORARY; + src[0].Index = u_temp_i; + src[0].RelAddr = 0; + u_temp_i--; + } + } + + switch (vpi->Opcode) { + case OPCODE_ABS: + inst = r300TranslateOpcodeABS(compiler->code, vpi, inst, src); + break; + case OPCODE_ADD: + inst = r300TranslateOpcodeADD(compiler->code, vpi, inst, src); + break; + case OPCODE_ARL: + inst = r300TranslateOpcodeARL(compiler->code, vpi, inst, src); + break; + case OPCODE_DP3: + inst = r300TranslateOpcodeDP3(compiler->code, vpi, inst, src); + break; + case OPCODE_DP4: + inst = r300TranslateOpcodeDP4(compiler->code, vpi, inst, src); + break; + case OPCODE_DPH: + inst = r300TranslateOpcodeDPH(compiler->code, vpi, inst, src); + break; + case OPCODE_DST: + inst = r300TranslateOpcodeDST(compiler->code, vpi, inst, src); + break; + case OPCODE_EX2: + inst = r300TranslateOpcodeEX2(compiler->code, vpi, inst, src); + break; + case OPCODE_EXP: + inst = r300TranslateOpcodeEXP(compiler->code, vpi, inst, src); + break; + case OPCODE_FLR: + inst = r300TranslateOpcodeFLR(compiler->code, vpi, inst, src, /* FIXME */ + &u_temp_i); + break; + case OPCODE_FRC: + inst = r300TranslateOpcodeFRC(compiler->code, vpi, inst, src); + break; + case OPCODE_LG2: + inst = r300TranslateOpcodeLG2(compiler->code, vpi, inst, src); + break; + case OPCODE_LIT: + inst = r300TranslateOpcodeLIT(compiler->code, vpi, inst, src); + break; + case OPCODE_LOG: + inst = r300TranslateOpcodeLOG(compiler->code, vpi, inst, src); + break; + case OPCODE_MAD: + inst = r300TranslateOpcodeMAD(compiler->code, vpi, inst, src); + break; + case OPCODE_MAX: + inst = r300TranslateOpcodeMAX(compiler->code, vpi, inst, src); + break; + case OPCODE_MIN: + inst = r300TranslateOpcodeMIN(compiler->code, vpi, inst, src); + break; + case OPCODE_MOV: + inst = r300TranslateOpcodeMOV(compiler->code, vpi, inst, src); + break; + case OPCODE_MUL: + inst = r300TranslateOpcodeMUL(compiler->code, vpi, inst, src); + break; + case OPCODE_POW: + inst = r300TranslateOpcodePOW(compiler->code, vpi, inst, src); + break; + case OPCODE_RCP: + inst = r300TranslateOpcodeRCP(compiler->code, vpi, inst, src); + break; + case OPCODE_RSQ: + inst = r300TranslateOpcodeRSQ(compiler->code, vpi, inst, src); + break; + case OPCODE_SGE: + inst = r300TranslateOpcodeSGE(compiler->code, vpi, inst, src); + break; + case OPCODE_SLT: + inst = r300TranslateOpcodeSLT(compiler->code, vpi, inst, src); + break; + case OPCODE_SUB: + inst = r300TranslateOpcodeSUB(compiler->code, vpi, inst, src); + break; + case OPCODE_SWZ: + inst = r300TranslateOpcodeSWZ(compiler->code, vpi, inst, src); + break; + case OPCODE_XPD: + inst = r300TranslateOpcodeXPD(compiler->code, vpi, inst, src, /* FIXME */ + &u_temp_i); + break; + default: + return GL_FALSE; + } + } + + compiler->code->length = (inst - compiler->code->body.d); + if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) { + return GL_FALSE; + } + + return GL_TRUE; +} + +static void insert_wpos(struct gl_program *prog, GLuint temp_index, int tex_id) +{ + struct prog_instruction *vpi; + + _mesa_insert_instructions(prog, prog->NumInstructions - 1, 2); + + vpi = &prog->Instructions[prog->NumInstructions - 3]; + + vpi->Opcode = OPCODE_MOV; + + vpi->DstReg.File = PROGRAM_OUTPUT; + vpi->DstReg.Index = VERT_RESULT_HPOS; + vpi->DstReg.WriteMask = WRITEMASK_XYZW; + vpi->DstReg.CondMask = COND_TR; + + vpi->SrcReg[0].File = PROGRAM_TEMPORARY; + vpi->SrcReg[0].Index = temp_index; + vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW; + + ++vpi; + + vpi->Opcode = OPCODE_MOV; + + vpi->DstReg.File = PROGRAM_OUTPUT; + vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id; + vpi->DstReg.WriteMask = WRITEMASK_XYZW; + vpi->DstReg.CondMask = COND_TR; + + vpi->SrcReg[0].File = PROGRAM_TEMPORARY; + vpi->SrcReg[0].Index = temp_index; + vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW; + + ++vpi; + + vpi->Opcode = OPCODE_END; +} + +static void pos_as_texcoord(struct gl_program *prog, int tex_id) +{ + struct prog_instruction *vpi; + GLuint tempregi = prog->NumTemporaries; + + prog->NumTemporaries++; + + for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) { + if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_HPOS) { + vpi->DstReg.File = PROGRAM_TEMPORARY; + vpi->DstReg.Index = tempregi; + } + } + + insert_wpos(prog, tempregi, tex_id); + + prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id); +} + +/** + * The fogcoord attribute is special in that only the first component + * is relevant, and the remaining components are always fixed (when read + * from by the fragment program) to yield an X001 pattern. + * + * We need to enforce this either in the vertex program or in the fragment + * program, and this code chooses not to enforce it in the vertex program. + * This is slightly cheaper, as long as the fragment program does not use + * weird swizzles. + * + * And it seems that usually, weird swizzles are not used, so... + * + * See also the counterpart rewriting for fragment programs. + */ +static void fog_as_texcoord(struct gl_program *prog, int tex_id) +{ + struct prog_instruction *vpi; + + vpi = prog->Instructions; + while (vpi->Opcode != OPCODE_END) { + if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_FOGC) { + vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id; + vpi->DstReg.WriteMask = WRITEMASK_X; + } + + ++vpi; + } + + prog->OutputsWritten &= ~(1 << VERT_RESULT_FOGC); + prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id); +} + +static int translateABS(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; + + inst = &prog->Instructions[pos]; + + inst->Opcode = OPCODE_MAX; + inst->SrcReg[1] = inst->SrcReg[0]; + inst->SrcReg[1].Negate ^= NEGATE_XYZW; + + return 0; +} + +static int translateDP3(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; + + inst = &prog->Instructions[pos]; + + inst->Opcode = OPCODE_DP4; + inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + return 0; +} + +static int translateDPH(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; + + inst = &prog->Instructions[pos]; + + inst->Opcode = OPCODE_DP4; + inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); + + return 0; +} + +static int translateFLR(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; + struct prog_dst_register dst; + int tmp_idx; + + tmp_idx = prog->NumTemporaries++; + + _mesa_insert_instructions(prog, pos + 1, 1); + + inst = &prog->Instructions[pos]; + dst = inst->DstReg; + + inst->Opcode = OPCODE_FRC; + inst->DstReg.File = PROGRAM_TEMPORARY; + inst->DstReg.Index = tmp_idx; + ++inst; + + inst->Opcode = OPCODE_ADD; + inst->DstReg = dst; + inst->SrcReg[0] = (inst-1)->SrcReg[0]; + inst->SrcReg[1].File = PROGRAM_TEMPORARY; + inst->SrcReg[1].Index = tmp_idx; + inst->SrcReg[1].Negate = NEGATE_XYZW; + + return 1; +} + +static int translateSUB(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; + + inst = &prog->Instructions[pos]; + + inst->Opcode = OPCODE_ADD; + inst->SrcReg[1].Negate ^= NEGATE_XYZW; + + return 0; +} + +static int translateSWZ(struct gl_program *prog, int pos) +{ + prog->Instructions[pos].Opcode = OPCODE_MOV; + + return 0; +} + +static int translateXPD(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; + int tmp_idx; + + tmp_idx = prog->NumTemporaries++; + + _mesa_insert_instructions(prog, pos + 1, 1); + + inst = &prog->Instructions[pos]; + + *(inst+1) = *inst; + + inst->Opcode = OPCODE_MUL; + inst->DstReg.File = PROGRAM_TEMPORARY; + inst->DstReg.Index = tmp_idx; + inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W); + inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W); + ++inst; + + inst->Opcode = OPCODE_MAD; + inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W); + inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W); + inst->SrcReg[1].Negate ^= NEGATE_XYZW; + inst->SrcReg[2].File = PROGRAM_TEMPORARY; + inst->SrcReg[2].Index = tmp_idx; + + return 1; +} + +static void translateInsts(struct gl_program *prog) +{ + struct prog_instruction *inst; + int i; + + for (i = 0; i < prog->NumInstructions; ++i) { + inst = &prog->Instructions[i]; + + switch (inst->Opcode) { + case OPCODE_ABS: + i += translateABS(prog, i); + break; + case OPCODE_DP3: + i += translateDP3(prog, i); + break; + case OPCODE_DPH: + i += translateDPH(prog, i); + break; + case OPCODE_FLR: + i += translateFLR(prog, i); + break; + case OPCODE_SUB: + i += translateSUB(prog, i); + break; + case OPCODE_SWZ: + i += translateSWZ(prog, i); + break; + case OPCODE_XPD: + i += translateXPD(prog, i); + break; + default: + break; + } + } +} + +#define ADD_OUTPUT(fp_attr, vp_result) \ + do { \ + if ((FpReads & (1 << (fp_attr))) && !(compiler->program->OutputsWritten & (1 << (vp_result)))) { \ + OutputsAdded |= 1 << (vp_result); \ + count++; \ + } \ + } while (0) + +static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler) +{ + GLuint OutputsAdded, FpReads; + int i, count; + + OutputsAdded = 0; + count = 0; + FpReads = compiler->state.FpReads; + + ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0); + ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1); + + for (i = 0; i < 7; ++i) { + ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i); + } + + /* Some outputs may be artificially added, to match the inputs of the fragment program. + * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by + * vertex program are undefined, so just use MOV [vertex_result], CONST[0] + */ + if (count > 0) { + struct prog_instruction *inst; + + _mesa_insert_instructions(compiler->program, compiler->program->NumInstructions - 1, count); + inst = &compiler->program->Instructions[compiler->program->NumInstructions - 1 - count]; + + for (i = 0; i < VERT_RESULT_MAX; ++i) { + if (OutputsAdded & (1 << i)) { + inst->Opcode = OPCODE_MOV; + + inst->DstReg.File = PROGRAM_OUTPUT; + inst->DstReg.Index = i; + inst->DstReg.WriteMask = WRITEMASK_XYZW; + inst->DstReg.CondMask = COND_TR; + + inst->SrcReg[0].File = PROGRAM_CONSTANT; + inst->SrcReg[0].Index = 0; + inst->SrcReg[0].Swizzle = SWIZZLE_XYZW; + + ++inst; + } + } + + compiler->program->OutputsWritten |= OutputsAdded; + } +} + +#undef ADD_OUTPUT + +static void nqssadceInit(struct nqssadce_state* s) +{ + struct r300_vertex_program_compiler * compiler = s->UserData; + GLuint fp_reads; + + fp_reads = compiler->state.FpReads; + { + if (fp_reads & FRAG_BIT_COL0) { + s->Outputs[VERT_RESULT_COL0].Sourced = WRITEMASK_XYZW; + s->Outputs[VERT_RESULT_BFC0].Sourced = WRITEMASK_XYZW; + } + + if (fp_reads & FRAG_BIT_COL1) { + s->Outputs[VERT_RESULT_COL1].Sourced = WRITEMASK_XYZW; + s->Outputs[VERT_RESULT_BFC1].Sourced = WRITEMASK_XYZW; + } + } + + { + int i; + for (i = 0; i < 8; ++i) { + if (fp_reads & FRAG_BIT_TEX(i)) { + s->Outputs[VERT_RESULT_TEX0 + i].Sourced = WRITEMASK_XYZW; + } + } + } + + s->Outputs[VERT_RESULT_HPOS].Sourced = WRITEMASK_XYZW; + if (s->Program->OutputsWritten & (1 << VERT_RESULT_PSIZ)) + s->Outputs[VERT_RESULT_PSIZ].Sourced = WRITEMASK_X; +} + +static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg) +{ + (void) opcode; + (void) reg; + + return GL_TRUE; +} + + + +GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler, GLcontext * ctx) +{ + GLboolean success; + + if (compiler->state.WPosAttr != FRAG_ATTRIB_MAX) { + pos_as_texcoord(compiler->program, compiler->state.WPosAttr - FRAG_ATTRIB_TEX0); + } + + if (compiler->state.FogAttr != FRAG_ATTRIB_MAX) { + fog_as_texcoord(compiler->program, compiler->state.FogAttr - FRAG_ATTRIB_TEX0); + } + + addArtificialOutputs(compiler); + + translateInsts(compiler->program); + + if (compiler->Base.Debug) { + fprintf(stderr, "Vertex program after native rewrite:\n"); + _mesa_print_program(compiler->program); + fflush(stdout); + } + + { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadceInit, + .IsNativeSwizzle = &swizzleIsNative, + .BuildSwizzle = NULL + }; + radeonNqssaDce(compiler->program, &nqssadce, compiler); + + /* We need this step for reusing temporary registers */ + _mesa_optimize_program(ctx, compiler->program); + + if (compiler->Base.Debug) { + fprintf(stderr, "Vertex program after NQSSADCE:\n"); + _mesa_print_program(compiler->program); + fflush(stdout); + } + } + + assert(compiler->program->NumInstructions); + { + struct prog_instruction *inst; + int max, i, tmp; + + inst = compiler->program->Instructions; + max = -1; + while (inst->Opcode != OPCODE_END) { + tmp = _mesa_num_inst_src_regs(inst->Opcode); + for (i = 0; i < tmp; ++i) { + if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) { + if ((int) inst->SrcReg[i].Index > max) { + max = inst->SrcReg[i].Index; + } + } + } + + if (_mesa_num_inst_dst_regs(inst->Opcode)) { + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + if ((int) inst->DstReg.Index > max) { + max = inst->DstReg.Index; + } + } + } + ++inst; + } + + /* We actually want highest index of used temporary register, + * not the number of temporaries used. + * These values aren't always the same. + */ + compiler->code->num_temporaries = max + 1; + } + + success = translate_vertex_program(compiler); + + compiler->code->InputsRead = compiler->program->InputsRead; + compiler->code->OutputsWritten = compiler->program->OutputsWritten; + + return success; +} -- cgit v1.2.3 From 127ca61fa34497e69149360201ae97f87cb9f38e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Thu, 23 Jul 2009 19:25:06 +0200 Subject: r300/vertprog: Use generic transforms and throw away unneeded code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nicolai Hähnle --- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 443 +-------------------- 1 file changed, 8 insertions(+), 435 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c') diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index b074c98ee9..c7fc2617de 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -25,6 +25,8 @@ #include "../r300_reg.h" #include "radeon_nqssadce.h" +#include "radeon_program.h" +#include "radeon_program_alu.h" #include "shader/prog_optimize.h" #include "shader/prog_print.h" @@ -186,34 +188,6 @@ static GLboolean valid_dst(struct r300_vertex_program_code *vp, return GL_TRUE; } -static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W - - inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), - t_src_class(src[0].File), - (!src[0]. - Negate) ? NEGATE_XYZW : NEGATE_NONE) | - (src[0].RelAddr << 4); - inst[3] = 0; - - return inst; -} - static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program_code *vp, struct prog_instruction *vpi, GLuint * inst, @@ -250,40 +224,6 @@ static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program_code *vp, return inst; } -static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} - - inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), - SWIZZLE_ZERO, - t_src_class(src[0].File), - src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | - (src[0].RelAddr << 4); - inst[2] = - PVS_SRC_OPERAND(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO, - t_src_class(src[1].File), - src[1].Negate ? NEGATE_XYZW : NEGATE_NONE) | - (src[1].RelAddr << 4); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program_code *vp, struct prog_instruction *vpi, GLuint * inst, @@ -302,32 +242,6 @@ static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program_code *vp, return inst; } -static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} - inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), - PVS_SRC_SELECT_FORCE_1, - t_src_class(src[0].File), - src[0].Negate ? NEGATE_XYZ : NEGATE_NONE) | - (src[0].RelAddr << 4); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program_code *vp, struct prog_instruction *vpi, GLuint * inst, @@ -382,47 +296,6 @@ static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program_code *vp, return inst; } -static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3], - int *u_temp_i) -{ - /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} - ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */ - - inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION, - GL_FALSE, - GL_FALSE, - *u_temp_i, - t_dst_mask(vpi->DstReg.WriteMask), - PVS_DST_REG_TEMPORARY); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - inst += 4; - - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = PVS_SRC_OPERAND(*u_temp_i, - PVS_SRC_SELECT_X, - PVS_SRC_SELECT_Y, - PVS_SRC_SELECT_Z, - PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY, - /* Not 100% sure about this */ - (!src[0]. - Negate) ? NEGATE_XYZW : NEGATE_NONE); - inst[3] = __CONST(0, SWIZZLE_ZERO); - (*u_temp_i)--; - - return inst; -} - static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program_code *vp, struct prog_instruction *vpi, GLuint * inst, @@ -707,139 +580,6 @@ static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program_code *vp, return inst; } -static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W - -#if 0 - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), - t_src_class(src[1].File), - (!src[1]. - Negate) ? NEGATE_XYZW : NEGATE_NONE) | - (src[1].RelAddr << 4); - inst[3] = 0; -#else - inst[0] = - PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ONE); - inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), - t_src_class(src[1].File), - (!src[1]. - Negate) ? NEGATE_XYZW : NEGATE_NONE) | - (src[1].RelAddr << 4); -#endif - - return inst; -} - -static GLuint *r300TranslateOpcodeSWZ(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} - - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3], - int *u_temp_i) -{ - /* mul r0, r1.yzxw, r2.zxyw - mad r0, -r2.yzxw, r1.zxyw, r0 - */ - - inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, - GL_FALSE, - GL_FALSE, - *u_temp_i, - t_dst_mask(vpi->DstReg.WriteMask), - PVS_DST_REG_TEMPORARY); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - t_src_class(src[0].File), - src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | - (src[0].RelAddr << 4); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W - t_src_class(src[1].File), - src[1].Negate ? NEGATE_XYZW : NEGATE_NONE) | - (src[1].RelAddr << 4); - inst[3] = __CONST(1, SWIZZLE_ZERO); - inst += 4; - - inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W - t_src_class(src[1].File), - (!src[1]. - Negate) ? NEGATE_XYZW : NEGATE_NONE) | - (src[1].RelAddr << 4); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - t_src_class(src[0].File), - src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | - (src[0].RelAddr << 4); - inst[3] = - PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, - PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, - PVS_SRC_REG_TEMPORARY, NEGATE_NONE); - - (*u_temp_i)--; - - return inst; -} - static void t_inputs_outputs(struct r300_vertex_program_code *vp, struct gl_program * glvp) { int i; @@ -1017,24 +757,15 @@ static GLboolean translate_vertex_program(struct r300_vertex_program_compiler * } switch (vpi->Opcode) { - case OPCODE_ABS: - inst = r300TranslateOpcodeABS(compiler->code, vpi, inst, src); - break; case OPCODE_ADD: inst = r300TranslateOpcodeADD(compiler->code, vpi, inst, src); break; case OPCODE_ARL: inst = r300TranslateOpcodeARL(compiler->code, vpi, inst, src); break; - case OPCODE_DP3: - inst = r300TranslateOpcodeDP3(compiler->code, vpi, inst, src); - break; case OPCODE_DP4: inst = r300TranslateOpcodeDP4(compiler->code, vpi, inst, src); break; - case OPCODE_DPH: - inst = r300TranslateOpcodeDPH(compiler->code, vpi, inst, src); - break; case OPCODE_DST: inst = r300TranslateOpcodeDST(compiler->code, vpi, inst, src); break; @@ -1044,10 +775,6 @@ static GLboolean translate_vertex_program(struct r300_vertex_program_compiler * case OPCODE_EXP: inst = r300TranslateOpcodeEXP(compiler->code, vpi, inst, src); break; - case OPCODE_FLR: - inst = r300TranslateOpcodeFLR(compiler->code, vpi, inst, src, /* FIXME */ - &u_temp_i); - break; case OPCODE_FRC: inst = r300TranslateOpcodeFRC(compiler->code, vpi, inst, src); break; @@ -1090,16 +817,6 @@ static GLboolean translate_vertex_program(struct r300_vertex_program_compiler * case OPCODE_SLT: inst = r300TranslateOpcodeSLT(compiler->code, vpi, inst, src); break; - case OPCODE_SUB: - inst = r300TranslateOpcodeSUB(compiler->code, vpi, inst, src); - break; - case OPCODE_SWZ: - inst = r300TranslateOpcodeSWZ(compiler->code, vpi, inst, src); - break; - case OPCODE_XPD: - inst = r300TranslateOpcodeXPD(compiler->code, vpi, inst, src, /* FIXME */ - &u_temp_i); - break; default: return GL_FALSE; } @@ -1201,155 +918,6 @@ static void fog_as_texcoord(struct gl_program *prog, int tex_id) prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id); } -static int translateABS(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - - inst = &prog->Instructions[pos]; - - inst->Opcode = OPCODE_MAX; - inst->SrcReg[1] = inst->SrcReg[0]; - inst->SrcReg[1].Negate ^= NEGATE_XYZW; - - return 0; -} - -static int translateDP3(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - - inst = &prog->Instructions[pos]; - - inst->Opcode = OPCODE_DP4; - inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - return 0; -} - -static int translateDPH(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - - inst = &prog->Instructions[pos]; - - inst->Opcode = OPCODE_DP4; - inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); - - return 0; -} - -static int translateFLR(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - struct prog_dst_register dst; - int tmp_idx; - - tmp_idx = prog->NumTemporaries++; - - _mesa_insert_instructions(prog, pos + 1, 1); - - inst = &prog->Instructions[pos]; - dst = inst->DstReg; - - inst->Opcode = OPCODE_FRC; - inst->DstReg.File = PROGRAM_TEMPORARY; - inst->DstReg.Index = tmp_idx; - ++inst; - - inst->Opcode = OPCODE_ADD; - inst->DstReg = dst; - inst->SrcReg[0] = (inst-1)->SrcReg[0]; - inst->SrcReg[1].File = PROGRAM_TEMPORARY; - inst->SrcReg[1].Index = tmp_idx; - inst->SrcReg[1].Negate = NEGATE_XYZW; - - return 1; -} - -static int translateSUB(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - - inst = &prog->Instructions[pos]; - - inst->Opcode = OPCODE_ADD; - inst->SrcReg[1].Negate ^= NEGATE_XYZW; - - return 0; -} - -static int translateSWZ(struct gl_program *prog, int pos) -{ - prog->Instructions[pos].Opcode = OPCODE_MOV; - - return 0; -} - -static int translateXPD(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - int tmp_idx; - - tmp_idx = prog->NumTemporaries++; - - _mesa_insert_instructions(prog, pos + 1, 1); - - inst = &prog->Instructions[pos]; - - *(inst+1) = *inst; - - inst->Opcode = OPCODE_MUL; - inst->DstReg.File = PROGRAM_TEMPORARY; - inst->DstReg.Index = tmp_idx; - inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W); - inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W); - ++inst; - - inst->Opcode = OPCODE_MAD; - inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W); - inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W); - inst->SrcReg[1].Negate ^= NEGATE_XYZW; - inst->SrcReg[2].File = PROGRAM_TEMPORARY; - inst->SrcReg[2].Index = tmp_idx; - - return 1; -} - -static void translateInsts(struct gl_program *prog) -{ - struct prog_instruction *inst; - int i; - - for (i = 0; i < prog->NumInstructions; ++i) { - inst = &prog->Instructions[i]; - - switch (inst->Opcode) { - case OPCODE_ABS: - i += translateABS(prog, i); - break; - case OPCODE_DP3: - i += translateDP3(prog, i); - break; - case OPCODE_DPH: - i += translateDPH(prog, i); - break; - case OPCODE_FLR: - i += translateFLR(prog, i); - break; - case OPCODE_SUB: - i += translateSUB(prog, i); - break; - case OPCODE_SWZ: - i += translateSWZ(prog, i); - break; - case OPCODE_XPD: - i += translateXPD(prog, i); - break; - default: - break; - } - } -} #define ADD_OUTPUT(fp_attr, vp_result) \ do { \ @@ -1464,7 +1032,12 @@ GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compi addArtificialOutputs(compiler); - translateInsts(compiler->program); + { + struct radeon_program_transformation transformations[] = { + { &r300_transform_vertex_alu, 0 }, + }; + radeonLocalTransform(compiler->program, 1, transformations); + } if (compiler->Base.Debug) { fprintf(stderr, "Vertex program after native rewrite:\n"); -- cgit v1.2.3 From 86e3334333d1de7fd723221155de9c8c1d0ce1c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Thu, 23 Jul 2009 19:52:00 +0200 Subject: r300/vertprog: Massively reduce code duplication MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nicolai Hähnle --- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 383 +++------------------ 1 file changed, 42 insertions(+), 341 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c') diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index c7fc2617de..ff6575b303 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -188,30 +188,13 @@ static GLboolean valid_dst(struct r300_vertex_program_code *vp, return GL_TRUE; } -static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) +static GLuint * ei_vector1(struct r300_vertex_program_code *vp, + GLuint hw_opcode, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) { - inst[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX, + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, GL_FALSE, GL_FALSE, t_dst_index(vp, &vpi->DstReg), @@ -224,30 +207,13 @@ static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program_code *vp, return inst; } -static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) +static GLuint * ei_vector2(struct r300_vertex_program_code *vp, + GLuint hw_opcode, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) { - inst[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR, + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, GL_FALSE, GL_FALSE, t_dst_index(vp, &vpi->DstReg), @@ -260,30 +226,13 @@ static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program_code *vp, return inst; } -static GLuint *r300TranslateOpcodeEX2(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) +static GLuint *ei_math1(struct r300_vertex_program_code *vp, + GLuint hw_opcode, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) { - inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX, + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, GL_TRUE, GL_FALSE, t_dst_index(vp, &vpi->DstReg), @@ -296,52 +245,7 @@ static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program_code *vp, return inst; } -static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X} - - inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_src_class(src[0].File), - src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | - (src[0].RelAddr << 4); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program_code *vp, +static GLuint *ei_lit(struct r300_vertex_program_code *vp, struct prog_instruction *vpi, GLuint * inst, struct prog_src_register src[3]) @@ -380,25 +284,7 @@ static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program_code *vp, return inst; } -static GLuint *r300TranslateOpcodeLOG(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program_code *vp, +static GLuint *ei_mad(struct r300_vertex_program_code *vp, struct prog_instruction *vpi, GLuint * inst, struct prog_src_register src[3]) @@ -416,81 +302,7 @@ static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program_code *vp, return inst; } -static GLuint *r300TranslateOpcodeMAX(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeMIN(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_MINIMUM, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeMOV(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} - - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeMUL(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program_code *vp, +static GLuint *ei_pow(struct r300_vertex_program_code *vp, struct prog_instruction *vpi, GLuint * inst, struct prog_src_register src[3]) @@ -508,78 +320,6 @@ static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program_code *vp, return inst; } -static GLuint *r300TranslateOpcodeRCP(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeRSQ(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeSGE(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - static void t_inputs_outputs(struct r300_vertex_program_code *vp, struct gl_program * glvp) { int i; @@ -757,67 +497,28 @@ static GLboolean translate_vertex_program(struct r300_vertex_program_compiler * } switch (vpi->Opcode) { - case OPCODE_ADD: - inst = r300TranslateOpcodeADD(compiler->code, vpi, inst, src); - break; - case OPCODE_ARL: - inst = r300TranslateOpcodeARL(compiler->code, vpi, inst, src); - break; - case OPCODE_DP4: - inst = r300TranslateOpcodeDP4(compiler->code, vpi, inst, src); - break; - case OPCODE_DST: - inst = r300TranslateOpcodeDST(compiler->code, vpi, inst, src); - break; - case OPCODE_EX2: - inst = r300TranslateOpcodeEX2(compiler->code, vpi, inst, src); - break; - case OPCODE_EXP: - inst = r300TranslateOpcodeEXP(compiler->code, vpi, inst, src); - break; - case OPCODE_FRC: - inst = r300TranslateOpcodeFRC(compiler->code, vpi, inst, src); - break; - case OPCODE_LG2: - inst = r300TranslateOpcodeLG2(compiler->code, vpi, inst, src); - break; - case OPCODE_LIT: - inst = r300TranslateOpcodeLIT(compiler->code, vpi, inst, src); - break; - case OPCODE_LOG: - inst = r300TranslateOpcodeLOG(compiler->code, vpi, inst, src); - break; - case OPCODE_MAD: - inst = r300TranslateOpcodeMAD(compiler->code, vpi, inst, src); - break; - case OPCODE_MAX: - inst = r300TranslateOpcodeMAX(compiler->code, vpi, inst, src); - break; - case OPCODE_MIN: - inst = r300TranslateOpcodeMIN(compiler->code, vpi, inst, src); - break; - case OPCODE_MOV: - inst = r300TranslateOpcodeMOV(compiler->code, vpi, inst, src); - break; - case OPCODE_MUL: - inst = r300TranslateOpcodeMUL(compiler->code, vpi, inst, src); - break; - case OPCODE_POW: - inst = r300TranslateOpcodePOW(compiler->code, vpi, inst, src); - break; - case OPCODE_RCP: - inst = r300TranslateOpcodeRCP(compiler->code, vpi, inst, src); - break; - case OPCODE_RSQ: - inst = r300TranslateOpcodeRSQ(compiler->code, vpi, inst, src); - break; - case OPCODE_SGE: - inst = r300TranslateOpcodeSGE(compiler->code, vpi, inst, src); - break; - case OPCODE_SLT: - inst = r300TranslateOpcodeSLT(compiler->code, vpi, inst, src); - break; + case OPCODE_ADD: inst = ei_vector2(compiler->code, VE_ADD, vpi, inst, src); break; + case OPCODE_ARL: inst = ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst, src); break; + case OPCODE_DP4: inst = ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst, src); break; + case OPCODE_DST: inst = ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst, src); break; + case OPCODE_EX2: inst = ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst, src); break; + case OPCODE_EXP: inst = ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst, src); break; + case OPCODE_FRC: inst = ei_vector1(compiler->code, VE_FRACTION, vpi, inst, src); break; + case OPCODE_LG2: inst = ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst, src); break; + case OPCODE_LIT: inst = ei_lit(compiler->code, vpi, inst, src); break; + case OPCODE_LOG: inst = ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst, src); break; + case OPCODE_MAD: inst = ei_mad(compiler->code, vpi, inst, src); break; + case OPCODE_MAX: inst = ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst, src); break; + case OPCODE_MIN: inst = ei_vector2(compiler->code, VE_MINIMUM, vpi, inst, src); break; + case OPCODE_MOV: inst = ei_vector1(compiler->code, VE_ADD, vpi, inst, src); break; + case OPCODE_MUL: inst = ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst, src); break; + case OPCODE_POW: inst = ei_pow(compiler->code, vpi, inst, src); break; + case OPCODE_RCP: inst = ei_math1(compiler->code, ME_RECIP_DX, vpi, inst, src); break; + case OPCODE_RSQ: inst = ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst, src); break; + case OPCODE_SGE: inst = ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst, src); break; + case OPCODE_SLT: inst = ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst, src); break; default: + fprintf(stderr, "Unknown opcode %i\n", vpi->Opcode); return GL_FALSE; } } -- cgit v1.2.3 From d65404225d8ba2c16eaffac833cb7dcfd2722a38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Thu, 23 Jul 2009 20:24:22 +0200 Subject: r300/vertprog: Cleanup source conflict handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nicolai Hähnle --- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 325 ++++++++++----------- 1 file changed, 149 insertions(+), 176 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c') diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index ff6575b303..400408620e 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -32,27 +32,18 @@ #include "shader/prog_print.h" -/* TODO: Get rid of t_src_class call */ -#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \ - ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \ - t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \ - (t_src_class(a.File) == PVS_SRC_REG_INPUT && \ - t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \ - /* * Take an already-setup and valid source then swizzle it appropriately to * obtain a constant ZERO or ONE source. */ #define __CONST(x, y) \ - (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \ + (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \ t_swizzle(y), \ t_swizzle(y), \ t_swizzle(y), \ t_swizzle(y), \ - t_src_class(src[x].File), \ - NEGATE_NONE) | (src[x].RelAddr << 4)) - - + t_src_class(vpi->SrcReg[x].File), \ + NEGATE_NONE) | (vpi->SrcReg[x].RelAddr << 4)) static unsigned long t_dst_mask(GLuint mask) @@ -121,6 +112,24 @@ static unsigned long t_src_class(gl_register_file file) } } +static GLboolean t_src_conflict(struct prog_src_register a, struct prog_src_register b) +{ + unsigned long aclass = t_src_class(a.File); + unsigned long bclass = t_src_class(b.File); + + if (aclass != bclass) + return GL_FALSE; + if (aclass == PVS_SRC_REG_TEMPORARY) + return GL_FALSE; + + if (a.RelAddr || b.RelAddr) + return GL_TRUE; + if (a.Index != b.Index) + return GL_TRUE; + + return GL_FALSE; +} + static INLINE unsigned long t_swizzle(GLubyte swizzle) { /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ @@ -188,11 +197,10 @@ static GLboolean valid_dst(struct r300_vertex_program_code *vp, return GL_TRUE; } -static GLuint * ei_vector1(struct r300_vertex_program_code *vp, +static void ei_vector1(struct r300_vertex_program_code *vp, GLuint hw_opcode, struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) + GLuint * inst) { inst[0] = PVS_OP_DST_OPERAND(hw_opcode, GL_FALSE, @@ -200,18 +208,15 @@ static GLuint * ei_vector1(struct r300_vertex_program_code *vp, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); + inst[1] = t_src(vp, &vpi->SrcReg[0]); inst[2] = __CONST(0, SWIZZLE_ZERO); inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; } -static GLuint * ei_vector2(struct r300_vertex_program_code *vp, +static void ei_vector2(struct r300_vertex_program_code *vp, GLuint hw_opcode, struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) + GLuint * inst) { inst[0] = PVS_OP_DST_OPERAND(hw_opcode, GL_FALSE, @@ -219,18 +224,15 @@ static GLuint * ei_vector2(struct r300_vertex_program_code *vp, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = t_src(vp, &vpi->SrcReg[1]); inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; } -static GLuint *ei_math1(struct r300_vertex_program_code *vp, +static void ei_math1(struct r300_vertex_program_code *vp, GLuint hw_opcode, struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) + GLuint * inst) { inst[0] = PVS_OP_DST_OPERAND(hw_opcode, GL_TRUE, @@ -238,17 +240,14 @@ static GLuint *ei_math1(struct r300_vertex_program_code *vp, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); + inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); inst[2] = __CONST(0, SWIZZLE_ZERO); inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; } -static GLuint *ei_lit(struct r300_vertex_program_code *vp, +static void ei_lit(struct r300_vertex_program_code *vp, struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) + GLuint * inst) { //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} @@ -259,35 +258,32 @@ static GLuint *ei_lit(struct r300_vertex_program_code *vp, t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); /* NOTE: Users swizzling might not work. */ - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W PVS_SRC_SELECT_FORCE_0, // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_src_class(src[0].File), - src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | - (src[0].RelAddr << 4); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (vpi->SrcReg[0].RelAddr << 4); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W PVS_SRC_SELECT_FORCE_0, // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - t_src_class(src[0].File), - src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | - (src[0].RelAddr << 4); - inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (vpi->SrcReg[0].RelAddr << 4); + inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X PVS_SRC_SELECT_FORCE_0, // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - t_src_class(src[0].File), - src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | - (src[0].RelAddr << 4); - - return inst; + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (vpi->SrcReg[0].RelAddr << 4); } -static GLuint *ei_mad(struct r300_vertex_program_code *vp, +static void ei_mad(struct r300_vertex_program_code *vp, struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) + GLuint * inst) { inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, GL_FALSE, @@ -295,17 +291,14 @@ static GLuint *ei_mad(struct r300_vertex_program_code *vp, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = t_src(vp, &src[2]); - - return inst; + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = t_src(vp, &vpi->SrcReg[1]); + inst[3] = t_src(vp, &vpi->SrcReg[2]); } -static GLuint *ei_pow(struct r300_vertex_program_code *vp, +static void ei_pow(struct r300_vertex_program_code *vp, struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) + GLuint * inst) { inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF, GL_TRUE, @@ -313,11 +306,9 @@ static GLuint *ei_pow(struct r300_vertex_program_code *vp, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); + inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = t_src_scalar(vp, &src[1]); - - return inst; + inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]); } static void t_inputs_outputs(struct r300_vertex_program_code *vp, struct gl_program * glvp) @@ -397,15 +388,7 @@ static void t_inputs_outputs(struct r300_vertex_program_code *vp, struct gl_prog static GLboolean translate_vertex_program(struct r300_vertex_program_compiler * compiler) { struct prog_instruction *vpi = compiler->program->Instructions; - int i; GLuint *inst; - unsigned long num_operands; - /* Initial value should be last tmp reg that hw supports. - Strangely enough r300 doesnt mind even though these would be out of range. - Smart enough to realize that it doesnt need it? */ - int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1; - struct prog_src_register src[3]; - struct r300_vertex_program_code * vp = compiler->code; compiler->code->pos_end = 0; /* Not supported yet */ compiler->code->length = 0; @@ -414,109 +397,33 @@ static GLboolean translate_vertex_program(struct r300_vertex_program_compiler * for (inst = compiler->code->body.d; vpi->Opcode != OPCODE_END; vpi++, inst += 4) { - - { - int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; - if((compiler->code->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { - fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", compiler->code->num_temporaries, u_temp_used); - return GL_FALSE; - } - u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; - } - + /* Skip instructions writing to non-existing destination */ if (!valid_dst(compiler->code, &vpi->DstReg)) { - /* redirect result to unused temp */ - vpi->DstReg.File = PROGRAM_TEMPORARY; - vpi->DstReg.Index = u_temp_i; - } - - num_operands = _mesa_num_inst_src_regs(vpi->Opcode); - - /* copy the sources (src) from mesa into a local variable... is this needed? */ - for (i = 0; i < num_operands; i++) { - src[i] = vpi->SrcReg[i]; - } - - if (num_operands == 3) { /* TODO: scalars */ - if (CMP_SRCS(src[1], src[2]) - || CMP_SRCS(src[0], src[2])) { - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - u_temp_i, - WRITEMASK_XYZW, - PVS_DST_REG_TEMPORARY); - inst[1] = - PVS_SRC_OPERAND(t_src_index(compiler->code, &src[2]), - SWIZZLE_X, - SWIZZLE_Y, - SWIZZLE_Z, - SWIZZLE_W, - t_src_class(src[2].File), - NEGATE_NONE) | (src[2]. - RelAddr << - 4); - inst[2] = __CONST(2, SWIZZLE_ZERO); - inst[3] = __CONST(2, SWIZZLE_ZERO); - inst += 4; - - src[2].File = PROGRAM_TEMPORARY; - src[2].Index = u_temp_i; - src[2].RelAddr = 0; - u_temp_i--; - } - } - - if (num_operands >= 2) { - if (CMP_SRCS(src[1], src[0])) { - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - u_temp_i, - WRITEMASK_XYZW, - PVS_DST_REG_TEMPORARY); - inst[1] = - PVS_SRC_OPERAND(t_src_index(compiler->code, &src[0]), - SWIZZLE_X, - SWIZZLE_Y, - SWIZZLE_Z, - SWIZZLE_W, - t_src_class(src[0].File), - NEGATE_NONE) | (src[0]. - RelAddr << - 4); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - inst += 4; - - src[0].File = PROGRAM_TEMPORARY; - src[0].Index = u_temp_i; - src[0].RelAddr = 0; - u_temp_i--; - } + inst -= 4; + continue; } switch (vpi->Opcode) { - case OPCODE_ADD: inst = ei_vector2(compiler->code, VE_ADD, vpi, inst, src); break; - case OPCODE_ARL: inst = ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst, src); break; - case OPCODE_DP4: inst = ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst, src); break; - case OPCODE_DST: inst = ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst, src); break; - case OPCODE_EX2: inst = ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst, src); break; - case OPCODE_EXP: inst = ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst, src); break; - case OPCODE_FRC: inst = ei_vector1(compiler->code, VE_FRACTION, vpi, inst, src); break; - case OPCODE_LG2: inst = ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst, src); break; - case OPCODE_LIT: inst = ei_lit(compiler->code, vpi, inst, src); break; - case OPCODE_LOG: inst = ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst, src); break; - case OPCODE_MAD: inst = ei_mad(compiler->code, vpi, inst, src); break; - case OPCODE_MAX: inst = ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst, src); break; - case OPCODE_MIN: inst = ei_vector2(compiler->code, VE_MINIMUM, vpi, inst, src); break; - case OPCODE_MOV: inst = ei_vector1(compiler->code, VE_ADD, vpi, inst, src); break; - case OPCODE_MUL: inst = ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst, src); break; - case OPCODE_POW: inst = ei_pow(compiler->code, vpi, inst, src); break; - case OPCODE_RCP: inst = ei_math1(compiler->code, ME_RECIP_DX, vpi, inst, src); break; - case OPCODE_RSQ: inst = ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst, src); break; - case OPCODE_SGE: inst = ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst, src); break; - case OPCODE_SLT: inst = ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst, src); break; + case OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break; + case OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break; + case OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break; + case OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break; + case OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break; + case OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break; + case OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break; + case OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break; + case OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break; + case OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break; + case OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break; + case OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break; + case OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break; + case OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break; + case OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break; + case OPCODE_POW: ei_pow(compiler->code, vpi, inst); break; + case OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break; + case OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break; + case OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break; + case OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break; default: fprintf(stderr, "Unknown opcode %i\n", vpi->Opcode); return GL_FALSE; @@ -531,6 +438,55 @@ static GLboolean translate_vertex_program(struct r300_vertex_program_compiler * return GL_TRUE; } +/** + * Vertex engine cannot read two inputs or two constants at the same time. + * Introduce intermediate MOVs to temporary registers to account for this. + */ +static GLboolean transform_source_conflicts( + struct radeon_transform_context *t, + struct prog_instruction* orig_inst, + void* unused) +{ + struct prog_instruction inst = *orig_inst; + struct prog_instruction * dst; + GLuint num_operands = _mesa_num_inst_src_regs(inst.Opcode); + + if (num_operands == 3) { + if (t_src_conflict(inst.SrcReg[1], inst.SrcReg[2]) + || t_src_conflict(inst.SrcReg[0], inst.SrcReg[2])) { + int tmpreg = radeonFindFreeTemporary(t); + struct prog_instruction * inst_mov = radeonAppendInstructions(t->Program, 1); + inst_mov->Opcode = OPCODE_MOV; + inst_mov->DstReg.File = PROGRAM_TEMPORARY; + inst_mov->DstReg.Index = tmpreg; + inst_mov->SrcReg[0] = inst.SrcReg[2]; + + reset_srcreg(&inst.SrcReg[2]); + inst.SrcReg[2].File = PROGRAM_TEMPORARY; + inst.SrcReg[2].Index = tmpreg; + } + } + + if (num_operands >= 2) { + if (t_src_conflict(inst.SrcReg[1], inst.SrcReg[0])) { + int tmpreg = radeonFindFreeTemporary(t); + struct prog_instruction * inst_mov = radeonAppendInstructions(t->Program, 1); + inst_mov->Opcode = OPCODE_MOV; + inst_mov->DstReg.File = PROGRAM_TEMPORARY; + inst_mov->DstReg.Index = tmpreg; + inst_mov->SrcReg[0] = inst.SrcReg[1]; + + reset_srcreg(&inst.SrcReg[1]); + inst.SrcReg[1].File = PROGRAM_TEMPORARY; + inst.SrcReg[1].Index = tmpreg; + } + } + + dst = radeonAppendInstructions(t->Program, 1); + *dst = inst; + return GL_TRUE; +} + static void insert_wpos(struct gl_program *prog, GLuint temp_index, int tex_id) { struct prog_instruction *vpi; @@ -746,6 +702,23 @@ GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compi fflush(stdout); } + { + /* Note: This pass has to be done seperately from ALU rewrite, + * otherwise non-native ALU instructions with source conflits + * will not be treated properly. + */ + struct radeon_program_transformation transformations[] = { + { &transform_source_conflicts, 0 }, + }; + radeonLocalTransform(compiler->program, 1, transformations); + } + + if (compiler->Base.Debug) { + fprintf(stderr, "Vertex program after source conflict resolve:\n"); + _mesa_print_program(compiler->program); + fflush(stdout); + } + { struct radeon_nqssadce_descr nqssadce = { .Init = &nqssadceInit, -- cgit v1.2.3 From c5cb9a337881229f1db462632fa1d64e2677f316 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Thu, 23 Jul 2009 21:10:37 +0200 Subject: r300: Remove dependency on GLcontext from compiler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unfortunately, this does cause some code duplication (which we can hopefully eliminate eventually). Signed-off-by: Nicolai Hähnle --- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 131 +++++++++++++++------ 1 file changed, 96 insertions(+), 35 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c') diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index 400408620e..a0e081fe6f 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -28,7 +28,6 @@ #include "radeon_program.h" #include "radeon_program_alu.h" -#include "shader/prog_optimize.h" #include "shader/prog_print.h" @@ -438,6 +437,100 @@ static GLboolean translate_vertex_program(struct r300_vertex_program_compiler * return GL_TRUE; } +struct temporary_allocation { + GLuint Allocated:1; + GLuint HwTemp:15; + struct prog_instruction * LastRead; +}; + +static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler) +{ + struct prog_instruction *inst; + GLuint num_orig_temps = 0; + GLboolean hwtemps[VSF_MAX_FRAGMENT_TEMPS]; + struct temporary_allocation * ta; + GLuint i, j; + + compiler->code->num_temporaries = 0; + memset(hwtemps, 0, sizeof(hwtemps)); + + /* Pass 1: Count original temporaries and allocate structures */ + for(inst = compiler->program->Instructions; inst->Opcode != OPCODE_END; inst++) { + GLuint numsrcs = _mesa_num_inst_src_regs(inst->Opcode); + GLuint numdsts = _mesa_num_inst_dst_regs(inst->Opcode); + + for (i = 0; i < numsrcs; ++i) { + if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) { + if (inst->SrcReg[i].Index >= num_orig_temps) + num_orig_temps = inst->SrcReg[i].Index + 1; + } + } + + if (numdsts) { + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + if (inst->DstReg.Index >= num_orig_temps) + num_orig_temps = inst->DstReg.Index + 1; + } + } + } + + ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool, + sizeof(struct temporary_allocation) * num_orig_temps); + memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps); + + /* Pass 2: Determine original temporary lifetimes */ + for(inst = compiler->program->Instructions; inst->Opcode != OPCODE_END; inst++) { + GLuint numsrcs = _mesa_num_inst_src_regs(inst->Opcode); + + for (i = 0; i < numsrcs; ++i) { + if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) + ta[inst->SrcReg[i].Index].LastRead = inst; + } + } + + /* Pass 3: Register allocation */ + for(inst = compiler->program->Instructions; inst->Opcode != OPCODE_END; inst++) { + GLuint numsrcs = _mesa_num_inst_src_regs(inst->Opcode); + GLuint numdsts = _mesa_num_inst_dst_regs(inst->Opcode); + + for (i = 0; i < numsrcs; ++i) { + if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) { + GLuint orig = inst->SrcReg[i].Index; + inst->SrcReg[i].Index = ta[orig].HwTemp; + + if (ta[orig].Allocated && inst == ta[orig].LastRead) + hwtemps[ta[orig].HwTemp] = GL_FALSE; + } + } + + if (numdsts) { + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + GLuint orig = inst->DstReg.Index; + + if (!ta[orig].Allocated) { + for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) { + if (!hwtemps[j]) + break; + } + if (j >= VSF_MAX_FRAGMENT_TEMPS) { + fprintf(stderr, "Out of hw temporaries\n"); + } else { + ta[orig].Allocated = GL_TRUE; + ta[orig].HwTemp = j; + hwtemps[j] = GL_TRUE; + + if (j >= compiler->code->num_temporaries) + compiler->code->num_temporaries = j + 1; + } + } + + inst->DstReg.Index = ta[orig].HwTemp; + } + } + } +} + + /** * Vertex engine cannot read two inputs or two constants at the same time. * Introduce intermediate MOVs to temporary registers to account for this. @@ -675,7 +768,7 @@ static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg) -GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler, GLcontext * ctx) +GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) { GLboolean success; @@ -728,7 +821,7 @@ GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compi radeonNqssaDce(compiler->program, &nqssadce, compiler); /* We need this step for reusing temporary registers */ - _mesa_optimize_program(ctx, compiler->program); + allocate_temporary_registers(compiler); if (compiler->Base.Debug) { fprintf(stderr, "Vertex program after NQSSADCE:\n"); @@ -738,38 +831,6 @@ GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compi } assert(compiler->program->NumInstructions); - { - struct prog_instruction *inst; - int max, i, tmp; - - inst = compiler->program->Instructions; - max = -1; - while (inst->Opcode != OPCODE_END) { - tmp = _mesa_num_inst_src_regs(inst->Opcode); - for (i = 0; i < tmp; ++i) { - if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) { - if ((int) inst->SrcReg[i].Index > max) { - max = inst->SrcReg[i].Index; - } - } - } - - if (_mesa_num_inst_dst_regs(inst->Opcode)) { - if (inst->DstReg.File == PROGRAM_TEMPORARY) { - if ((int) inst->DstReg.Index > max) { - max = inst->DstReg.Index; - } - } - } - ++inst; - } - - /* We actually want highest index of used temporary register, - * not the number of temporaries used. - * These values aren't always the same. - */ - compiler->code->num_temporaries = max + 1; - } success = translate_vertex_program(compiler); -- cgit v1.2.3 From 8bcb6ef786dc41049b56e6efeca0f5788cfefe5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Thu, 23 Jul 2009 21:38:28 +0200 Subject: r300/compiler: Lay groundwork for better error handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nicolai Hähnle --- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 37 ++++++++++------------ 1 file changed, 17 insertions(+), 20 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c') diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index a0e081fe6f..9edff6b039 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -384,22 +384,25 @@ static void t_inputs_outputs(struct r300_vertex_program_code *vp, struct gl_prog } } -static GLboolean translate_vertex_program(struct r300_vertex_program_compiler * compiler) +static void translate_vertex_program(struct r300_vertex_program_compiler * compiler) { struct prog_instruction *vpi = compiler->program->Instructions; - GLuint *inst; compiler->code->pos_end = 0; /* Not supported yet */ compiler->code->length = 0; t_inputs_outputs(compiler->code, compiler->program); - for (inst = compiler->code->body.d; vpi->Opcode != OPCODE_END; - vpi++, inst += 4) { + for (; vpi->Opcode != OPCODE_END; vpi++) { + GLuint *inst = compiler->code->body.d + compiler->code->length; + /* Skip instructions writing to non-existing destination */ - if (!valid_dst(compiler->code, &vpi->DstReg)) { - inst -= 4; + if (!valid_dst(compiler->code, &vpi->DstReg)) continue; + + if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) { + rc_error(&compiler->Base, "Vertex program has too many instructions\n"); + return; } switch (vpi->Opcode) { @@ -424,17 +427,15 @@ static GLboolean translate_vertex_program(struct r300_vertex_program_compiler * case OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break; case OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break; default: - fprintf(stderr, "Unknown opcode %i\n", vpi->Opcode); - return GL_FALSE; + rc_error(&compiler->Base, "Unknown opcode %i\n", vpi->Opcode); + return; } - } - compiler->code->length = (inst - compiler->code->body.d); - if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) { - return GL_FALSE; - } + compiler->code->length += 4; - return GL_TRUE; + if (compiler->Base.Error) + return; + } } struct temporary_allocation { @@ -768,10 +769,8 @@ static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg) -GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) +void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) { - GLboolean success; - if (compiler->state.WPosAttr != FRAG_ATTRIB_MAX) { pos_as_texcoord(compiler->program, compiler->state.WPosAttr - FRAG_ATTRIB_TEX0); } @@ -832,10 +831,8 @@ GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compi assert(compiler->program->NumInstructions); - success = translate_vertex_program(compiler); + translate_vertex_program(compiler); compiler->code->InputsRead = compiler->program->InputsRead; compiler->code->OutputsWritten = compiler->program->OutputsWritten; - - return success; } -- cgit v1.2.3 From a898e7d66c834be6b6e964e85cbbdf73e93300e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Thu, 23 Jul 2009 22:09:48 +0200 Subject: r300/compiler: Refactor for rc_program usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nicolai Hähnle --- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 59 +++++++++++----------- 1 file changed, 30 insertions(+), 29 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c') diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index 9edff6b039..743fc20597 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -386,14 +386,15 @@ static void t_inputs_outputs(struct r300_vertex_program_code *vp, struct gl_prog static void translate_vertex_program(struct r300_vertex_program_compiler * compiler) { - struct prog_instruction *vpi = compiler->program->Instructions; + struct rc_instruction *rci; compiler->code->pos_end = 0; /* Not supported yet */ compiler->code->length = 0; t_inputs_outputs(compiler->code, compiler->program); - for (; vpi->Opcode != OPCODE_END; vpi++) { + for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) { + struct prog_instruction *vpi = &rci->I; GLuint *inst = compiler->code->body.d + compiler->code->length; /* Skip instructions writing to non-existing destination */ @@ -441,12 +442,12 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi struct temporary_allocation { GLuint Allocated:1; GLuint HwTemp:15; - struct prog_instruction * LastRead; + struct rc_instruction * LastRead; }; static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler) { - struct prog_instruction *inst; + struct rc_instruction *inst; GLuint num_orig_temps = 0; GLboolean hwtemps[VSF_MAX_FRAGMENT_TEMPS]; struct temporary_allocation * ta; @@ -456,21 +457,21 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c memset(hwtemps, 0, sizeof(hwtemps)); /* Pass 1: Count original temporaries and allocate structures */ - for(inst = compiler->program->Instructions; inst->Opcode != OPCODE_END; inst++) { - GLuint numsrcs = _mesa_num_inst_src_regs(inst->Opcode); - GLuint numdsts = _mesa_num_inst_dst_regs(inst->Opcode); + for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); for (i = 0; i < numsrcs; ++i) { - if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) { - if (inst->SrcReg[i].Index >= num_orig_temps) - num_orig_temps = inst->SrcReg[i].Index + 1; + if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) { + if (inst->I.SrcReg[i].Index >= num_orig_temps) + num_orig_temps = inst->I.SrcReg[i].Index + 1; } } if (numdsts) { - if (inst->DstReg.File == PROGRAM_TEMPORARY) { - if (inst->DstReg.Index >= num_orig_temps) - num_orig_temps = inst->DstReg.Index + 1; + if (inst->I.DstReg.File == PROGRAM_TEMPORARY) { + if (inst->I.DstReg.Index >= num_orig_temps) + num_orig_temps = inst->I.DstReg.Index + 1; } } } @@ -480,24 +481,24 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps); /* Pass 2: Determine original temporary lifetimes */ - for(inst = compiler->program->Instructions; inst->Opcode != OPCODE_END; inst++) { - GLuint numsrcs = _mesa_num_inst_src_regs(inst->Opcode); + for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); for (i = 0; i < numsrcs; ++i) { - if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) - ta[inst->SrcReg[i].Index].LastRead = inst; + if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) + ta[inst->I.SrcReg[i].Index].LastRead = inst; } } /* Pass 3: Register allocation */ - for(inst = compiler->program->Instructions; inst->Opcode != OPCODE_END; inst++) { - GLuint numsrcs = _mesa_num_inst_src_regs(inst->Opcode); - GLuint numdsts = _mesa_num_inst_dst_regs(inst->Opcode); + for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); for (i = 0; i < numsrcs; ++i) { - if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) { - GLuint orig = inst->SrcReg[i].Index; - inst->SrcReg[i].Index = ta[orig].HwTemp; + if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) { + GLuint orig = inst->I.SrcReg[i].Index; + inst->I.SrcReg[i].Index = ta[orig].HwTemp; if (ta[orig].Allocated && inst == ta[orig].LastRead) hwtemps[ta[orig].HwTemp] = GL_FALSE; @@ -505,8 +506,8 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c } if (numdsts) { - if (inst->DstReg.File == PROGRAM_TEMPORARY) { - GLuint orig = inst->DstReg.Index; + if (inst->I.DstReg.File == PROGRAM_TEMPORARY) { + GLuint orig = inst->I.DstReg.Index; if (!ta[orig].Allocated) { for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) { @@ -525,7 +526,7 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c } } - inst->DstReg.Index = ta[orig].HwTemp; + inst->I.DstReg.Index = ta[orig].HwTemp; } } } @@ -819,18 +820,18 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) }; radeonNqssaDce(compiler->program, &nqssadce, compiler); + rc_mesa_to_rc_program(&compiler->Base, compiler->program); + /* We need this step for reusing temporary registers */ allocate_temporary_registers(compiler); if (compiler->Base.Debug) { fprintf(stderr, "Vertex program after NQSSADCE:\n"); - _mesa_print_program(compiler->program); + rc_print_program(&compiler->Base.Program); fflush(stdout); } } - assert(compiler->program->NumInstructions); - translate_vertex_program(compiler); compiler->code->InputsRead = compiler->program->InputsRead; -- cgit v1.2.3 From 92f7a599c7e94b0687d02efef1890e1a8ed2f9f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Thu, 23 Jul 2009 22:49:31 +0200 Subject: r300/compiler: Refactor nqssadce to use rc_program MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nicolai Hähnle --- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 42 +++++++++++----------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c') diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index 743fc20597..cdcfa1d27e 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -310,35 +310,35 @@ static void ei_pow(struct r300_vertex_program_code *vp, inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]); } -static void t_inputs_outputs(struct r300_vertex_program_code *vp, struct gl_program * glvp) +static void t_inputs_outputs(struct r300_vertex_program_compiler * c) { int i; int cur_reg; GLuint OutputsWritten, InputsRead; - OutputsWritten = glvp->OutputsWritten; - InputsRead = glvp->InputsRead; + OutputsWritten = c->Base.Program.OutputsWritten; + InputsRead = c->Base.Program.InputsRead; cur_reg = -1; for (i = 0; i < VERT_ATTRIB_MAX; i++) { if (InputsRead & (1 << i)) - vp->inputs[i] = ++cur_reg; + c->code->inputs[i] = ++cur_reg; else - vp->inputs[i] = -1; + c->code->inputs[i] = -1; } cur_reg = 0; for (i = 0; i < VERT_RESULT_MAX; i++) - vp->outputs[i] = -1; + c->code->outputs[i] = -1; assert(OutputsWritten & (1 << VERT_RESULT_HPOS)); if (OutputsWritten & (1 << VERT_RESULT_HPOS)) { - vp->outputs[VERT_RESULT_HPOS] = cur_reg++; + c->code->outputs[VERT_RESULT_HPOS] = cur_reg++; } if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) { - vp->outputs[VERT_RESULT_PSIZ] = cur_reg++; + c->code->outputs[VERT_RESULT_PSIZ] = cur_reg++; } /* If we're writing back facing colors we need to send @@ -348,39 +348,39 @@ static void t_inputs_outputs(struct r300_vertex_program_code *vp, struct gl_prog * get written into appropriate output vectors. */ if (OutputsWritten & (1 << VERT_RESULT_COL0)) { - vp->outputs[VERT_RESULT_COL0] = cur_reg++; + c->code->outputs[VERT_RESULT_COL0] = cur_reg++; } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || OutputsWritten & (1 << VERT_RESULT_BFC1)) { cur_reg++; } if (OutputsWritten & (1 << VERT_RESULT_COL1)) { - vp->outputs[VERT_RESULT_COL1] = cur_reg++; + c->code->outputs[VERT_RESULT_COL1] = cur_reg++; } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || OutputsWritten & (1 << VERT_RESULT_BFC1)) { cur_reg++; } if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { - vp->outputs[VERT_RESULT_BFC0] = cur_reg++; + c->code->outputs[VERT_RESULT_BFC0] = cur_reg++; } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { cur_reg++; } if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { - vp->outputs[VERT_RESULT_BFC1] = cur_reg++; + c->code->outputs[VERT_RESULT_BFC1] = cur_reg++; } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { cur_reg++; } for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) { if (OutputsWritten & (1 << i)) { - vp->outputs[i] = cur_reg++; + c->code->outputs[i] = cur_reg++; } } if (OutputsWritten & (1 << VERT_RESULT_FOGC)) { - vp->outputs[VERT_RESULT_FOGC] = cur_reg++; + c->code->outputs[VERT_RESULT_FOGC] = cur_reg++; } } @@ -391,7 +391,7 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi compiler->code->pos_end = 0; /* Not supported yet */ compiler->code->length = 0; - t_inputs_outputs(compiler->code, compiler->program); + t_inputs_outputs(compiler); for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) { struct prog_instruction *vpi = &rci->I; @@ -756,7 +756,7 @@ static void nqssadceInit(struct nqssadce_state* s) } s->Outputs[VERT_RESULT_HPOS].Sourced = WRITEMASK_XYZW; - if (s->Program->OutputsWritten & (1 << VERT_RESULT_PSIZ)) + if (s->Compiler->Program.OutputsWritten & (1 << VERT_RESULT_PSIZ)) s->Outputs[VERT_RESULT_PSIZ].Sourced = WRITEMASK_X; } @@ -812,15 +812,15 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) fflush(stdout); } + rc_mesa_to_rc_program(&compiler->Base, compiler->program); + { struct radeon_nqssadce_descr nqssadce = { .Init = &nqssadceInit, .IsNativeSwizzle = &swizzleIsNative, .BuildSwizzle = NULL }; - radeonNqssaDce(compiler->program, &nqssadce, compiler); - - rc_mesa_to_rc_program(&compiler->Base, compiler->program); + radeonNqssaDce(&compiler->Base, &nqssadce, compiler); /* We need this step for reusing temporary registers */ allocate_temporary_registers(compiler); @@ -834,6 +834,6 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) translate_vertex_program(compiler); - compiler->code->InputsRead = compiler->program->InputsRead; - compiler->code->OutputsWritten = compiler->program->OutputsWritten; + compiler->code->InputsRead = compiler->Base.Program.InputsRead; + compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten; } -- cgit v1.2.3 From 800f48258623f8caf25d013f44784edb7caa3f93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Fri, 24 Jul 2009 22:41:14 +0200 Subject: r300: Allow compiler to add constants in a cleaner way MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding constants is used in a number of non-native instruction rewrites, and it required us to keep copies of modified gl_programs around. This is a first step towards ending this. Signed-off-by: Nicolai Hähnle --- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c') diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index cdcfa1d27e..c9d0996d44 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -834,6 +834,8 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) translate_vertex_program(compiler); + rc_constants_copy(&compiler->code->constants, &compiler->Base.Program.Constants); + compiler->code->InputsRead = compiler->Base.Program.InputsRead; compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten; } -- cgit v1.2.3 From 6f4608f53c7ba28b5640974fc1daf6ad860df2f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Fri, 24 Jul 2009 22:34:44 +0200 Subject: r300/compiler: Refactor local transforms to use rc_program MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nicolai Hähnle --- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 68 ++++++++++------------ 1 file changed, 32 insertions(+), 36 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c') diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index c9d0996d44..90910c45c6 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -538,47 +538,43 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c * Introduce intermediate MOVs to temporary registers to account for this. */ static GLboolean transform_source_conflicts( - struct radeon_transform_context *t, - struct prog_instruction* orig_inst, + struct radeon_compiler *c, + struct rc_instruction* inst, void* unused) { - struct prog_instruction inst = *orig_inst; - struct prog_instruction * dst; - GLuint num_operands = _mesa_num_inst_src_regs(inst.Opcode); + GLuint num_operands = _mesa_num_inst_src_regs(inst->I.Opcode); if (num_operands == 3) { - if (t_src_conflict(inst.SrcReg[1], inst.SrcReg[2]) - || t_src_conflict(inst.SrcReg[0], inst.SrcReg[2])) { - int tmpreg = radeonFindFreeTemporary(t); - struct prog_instruction * inst_mov = radeonAppendInstructions(t->Program, 1); - inst_mov->Opcode = OPCODE_MOV; - inst_mov->DstReg.File = PROGRAM_TEMPORARY; - inst_mov->DstReg.Index = tmpreg; - inst_mov->SrcReg[0] = inst.SrcReg[2]; - - reset_srcreg(&inst.SrcReg[2]); - inst.SrcReg[2].File = PROGRAM_TEMPORARY; - inst.SrcReg[2].Index = tmpreg; + if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[2]) + || t_src_conflict(inst->I.SrcReg[0], inst->I.SrcReg[2])) { + int tmpreg = rc_find_free_temporary(c); + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->I.Opcode = OPCODE_MOV; + inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mov->I.DstReg.Index = tmpreg; + inst_mov->I.SrcReg[0] = inst->I.SrcReg[2]; + + reset_srcreg(&inst->I.SrcReg[2]); + inst->I.SrcReg[2].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[2].Index = tmpreg; } } if (num_operands >= 2) { - if (t_src_conflict(inst.SrcReg[1], inst.SrcReg[0])) { - int tmpreg = radeonFindFreeTemporary(t); - struct prog_instruction * inst_mov = radeonAppendInstructions(t->Program, 1); - inst_mov->Opcode = OPCODE_MOV; - inst_mov->DstReg.File = PROGRAM_TEMPORARY; - inst_mov->DstReg.Index = tmpreg; - inst_mov->SrcReg[0] = inst.SrcReg[1]; - - reset_srcreg(&inst.SrcReg[1]); - inst.SrcReg[1].File = PROGRAM_TEMPORARY; - inst.SrcReg[1].Index = tmpreg; + if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[0])) { + int tmpreg = rc_find_free_temporary(c); + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->I.Opcode = OPCODE_MOV; + inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mov->I.DstReg.Index = tmpreg; + inst_mov->I.SrcReg[0] = inst->I.SrcReg[1]; + + reset_srcreg(&inst->I.SrcReg[1]); + inst->I.SrcReg[1].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[1].Index = tmpreg; } } - dst = radeonAppendInstructions(t->Program, 1); - *dst = inst; return GL_TRUE; } @@ -782,16 +778,18 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) addArtificialOutputs(compiler); + rc_mesa_to_rc_program(&compiler->Base, compiler->program); + { struct radeon_program_transformation transformations[] = { { &r300_transform_vertex_alu, 0 }, }; - radeonLocalTransform(compiler->program, 1, transformations); + radeonLocalTransform(&compiler->Base, 1, transformations); } if (compiler->Base.Debug) { fprintf(stderr, "Vertex program after native rewrite:\n"); - _mesa_print_program(compiler->program); + rc_print_program(&compiler->Base.Program); fflush(stdout); } @@ -803,17 +801,15 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) struct radeon_program_transformation transformations[] = { { &transform_source_conflicts, 0 }, }; - radeonLocalTransform(compiler->program, 1, transformations); + radeonLocalTransform(&compiler->Base, 1, transformations); } if (compiler->Base.Debug) { fprintf(stderr, "Vertex program after source conflict resolve:\n"); - _mesa_print_program(compiler->program); + rc_print_program(&compiler->Base.Program); fflush(stdout); } - rc_mesa_to_rc_program(&compiler->Base, compiler->program); - { struct radeon_nqssadce_descr nqssadce = { .Init = &nqssadceInit, -- cgit v1.2.3 From a1e8992ffa4e7bddb4aaeb567f9e2023ae08540e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Sat, 25 Jul 2009 00:07:46 +0200 Subject: r300/vertprog: Refactor addArtificialOutputs to use rc_program MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nicolai Hähnle --- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 95 +++++----------------- 1 file changed, 22 insertions(+), 73 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c') diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index 90910c45c6..53e62ae2f3 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -667,93 +667,42 @@ static void fog_as_texcoord(struct gl_program *prog, int tex_id) } -#define ADD_OUTPUT(fp_attr, vp_result) \ - do { \ - if ((FpReads & (1 << (fp_attr))) && !(compiler->program->OutputsWritten & (1 << (vp_result)))) { \ - OutputsAdded |= 1 << (vp_result); \ - count++; \ - } \ - } while (0) - static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler) { - GLuint OutputsAdded, FpReads; - int i, count; - - OutputsAdded = 0; - count = 0; - FpReads = compiler->state.FpReads; - - ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0); - ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1); - - for (i = 0; i < 7; ++i) { - ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i); - } - - /* Some outputs may be artificially added, to match the inputs of the fragment program. - * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by - * vertex program are undefined, so just use MOV [vertex_result], CONST[0] - */ - if (count > 0) { - struct prog_instruction *inst; - - _mesa_insert_instructions(compiler->program, compiler->program->NumInstructions - 1, count); - inst = &compiler->program->Instructions[compiler->program->NumInstructions - 1 - count]; + int i; - for (i = 0; i < VERT_RESULT_MAX; ++i) { - if (OutputsAdded & (1 << i)) { - inst->Opcode = OPCODE_MOV; + for(i = 0; i < 32; ++i) { + if ((compiler->RequiredOutputs & (1 << i)) && + !(compiler->Base.Program.OutputsWritten & (1 << i))) { + struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev); + inst->I.Opcode = OPCODE_MOV; - inst->DstReg.File = PROGRAM_OUTPUT; - inst->DstReg.Index = i; - inst->DstReg.WriteMask = WRITEMASK_XYZW; - inst->DstReg.CondMask = COND_TR; + inst->I.DstReg.File = PROGRAM_OUTPUT; + inst->I.DstReg.Index = i; + inst->I.DstReg.WriteMask = WRITEMASK_XYZW; - inst->SrcReg[0].File = PROGRAM_CONSTANT; - inst->SrcReg[0].Index = 0; - inst->SrcReg[0].Swizzle = SWIZZLE_XYZW; + inst->I.SrcReg[0].File = PROGRAM_CONSTANT; + inst->I.SrcReg[0].Index = 0; + inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW; - ++inst; - } + compiler->Base.Program.OutputsWritten |= 1 << i; } - - compiler->program->OutputsWritten |= OutputsAdded; } } -#undef ADD_OUTPUT - static void nqssadceInit(struct nqssadce_state* s) { struct r300_vertex_program_compiler * compiler = s->UserData; - GLuint fp_reads; - - fp_reads = compiler->state.FpReads; - { - if (fp_reads & FRAG_BIT_COL0) { - s->Outputs[VERT_RESULT_COL0].Sourced = WRITEMASK_XYZW; - s->Outputs[VERT_RESULT_BFC0].Sourced = WRITEMASK_XYZW; - } + int i; - if (fp_reads & FRAG_BIT_COL1) { - s->Outputs[VERT_RESULT_COL1].Sourced = WRITEMASK_XYZW; - s->Outputs[VERT_RESULT_BFC1].Sourced = WRITEMASK_XYZW; + for(i = 0; i < VERT_RESULT_MAX; ++i) { + if (compiler->RequiredOutputs & (1 << i)) { + if (i != VERT_RESULT_PSIZ) + s->Outputs[i].Sourced = WRITEMASK_XYZW; + else + s->Outputs[i].Sourced = WRITEMASK_X; /* ugly hack! */ } } - - { - int i; - for (i = 0; i < 8; ++i) { - if (fp_reads & FRAG_BIT_TEX(i)) { - s->Outputs[VERT_RESULT_TEX0 + i].Sourced = WRITEMASK_XYZW; - } - } - } - - s->Outputs[VERT_RESULT_HPOS].Sourced = WRITEMASK_XYZW; - if (s->Compiler->Program.OutputsWritten & (1 << VERT_RESULT_PSIZ)) - s->Outputs[VERT_RESULT_PSIZ].Sourced = WRITEMASK_X; } static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg) @@ -776,10 +725,10 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) fog_as_texcoord(compiler->program, compiler->state.FogAttr - FRAG_ATTRIB_TEX0); } - addArtificialOutputs(compiler); - rc_mesa_to_rc_program(&compiler->Base, compiler->program); + addArtificialOutputs(compiler); + { struct radeon_program_transformation transformations[] = { { &r300_transform_vertex_alu, 0 }, -- cgit v1.2.3 From ce0c32e3d23641214dae9b3fed863dc163b26ea4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Sat, 25 Jul 2009 00:41:05 +0200 Subject: r300/vertprog: Refactor fog_as_texcoord to use rc_program MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nicolai Hähnle --- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 40 +++------------------- 1 file changed, 4 insertions(+), 36 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c') diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index 53e62ae2f3..38ee9575a3 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -634,39 +634,6 @@ static void pos_as_texcoord(struct gl_program *prog, int tex_id) prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id); } -/** - * The fogcoord attribute is special in that only the first component - * is relevant, and the remaining components are always fixed (when read - * from by the fragment program) to yield an X001 pattern. - * - * We need to enforce this either in the vertex program or in the fragment - * program, and this code chooses not to enforce it in the vertex program. - * This is slightly cheaper, as long as the fragment program does not use - * weird swizzles. - * - * And it seems that usually, weird swizzles are not used, so... - * - * See also the counterpart rewriting for fragment programs. - */ -static void fog_as_texcoord(struct gl_program *prog, int tex_id) -{ - struct prog_instruction *vpi; - - vpi = prog->Instructions; - while (vpi->Opcode != OPCODE_END) { - if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_FOGC) { - vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id; - vpi->DstReg.WriteMask = WRITEMASK_X; - } - - ++vpi; - } - - prog->OutputsWritten &= ~(1 << VERT_RESULT_FOGC); - prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id); -} - - static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler) { int i; @@ -721,12 +688,13 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) pos_as_texcoord(compiler->program, compiler->state.WPosAttr - FRAG_ATTRIB_TEX0); } + rc_mesa_to_rc_program(&compiler->Base, compiler->program); + compiler->program = 0; + if (compiler->state.FogAttr != FRAG_ATTRIB_MAX) { - fog_as_texcoord(compiler->program, compiler->state.FogAttr - FRAG_ATTRIB_TEX0); + rc_move_output(&compiler->Base, VERT_RESULT_FOGC, compiler->state.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X); } - rc_mesa_to_rc_program(&compiler->Base, compiler->program); - addArtificialOutputs(compiler); { -- cgit v1.2.3 From 05a51f4b3dfa32c73b85b26254bf9ee270eb6be2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Sat, 25 Jul 2009 00:49:25 +0200 Subject: r300/vertprog: Refactor wpos rewrite using rc_program MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nicolai Hähnle --- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 70 +++------------------- 1 file changed, 9 insertions(+), 61 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c') diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index 38ee9575a3..c05b488645 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -578,62 +578,6 @@ static GLboolean transform_source_conflicts( return GL_TRUE; } -static void insert_wpos(struct gl_program *prog, GLuint temp_index, int tex_id) -{ - struct prog_instruction *vpi; - - _mesa_insert_instructions(prog, prog->NumInstructions - 1, 2); - - vpi = &prog->Instructions[prog->NumInstructions - 3]; - - vpi->Opcode = OPCODE_MOV; - - vpi->DstReg.File = PROGRAM_OUTPUT; - vpi->DstReg.Index = VERT_RESULT_HPOS; - vpi->DstReg.WriteMask = WRITEMASK_XYZW; - vpi->DstReg.CondMask = COND_TR; - - vpi->SrcReg[0].File = PROGRAM_TEMPORARY; - vpi->SrcReg[0].Index = temp_index; - vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW; - - ++vpi; - - vpi->Opcode = OPCODE_MOV; - - vpi->DstReg.File = PROGRAM_OUTPUT; - vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id; - vpi->DstReg.WriteMask = WRITEMASK_XYZW; - vpi->DstReg.CondMask = COND_TR; - - vpi->SrcReg[0].File = PROGRAM_TEMPORARY; - vpi->SrcReg[0].Index = temp_index; - vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW; - - ++vpi; - - vpi->Opcode = OPCODE_END; -} - -static void pos_as_texcoord(struct gl_program *prog, int tex_id) -{ - struct prog_instruction *vpi; - GLuint tempregi = prog->NumTemporaries; - - prog->NumTemporaries++; - - for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) { - if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_HPOS) { - vpi->DstReg.File = PROGRAM_TEMPORARY; - vpi->DstReg.Index = tempregi; - } - } - - insert_wpos(prog, tempregi, tex_id); - - prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id); -} - static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler) { int i; @@ -684,15 +628,19 @@ static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg) void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) { - if (compiler->state.WPosAttr != FRAG_ATTRIB_MAX) { - pos_as_texcoord(compiler->program, compiler->state.WPosAttr - FRAG_ATTRIB_TEX0); - } - rc_mesa_to_rc_program(&compiler->Base, compiler->program); compiler->program = 0; + if (compiler->state.WPosAttr != FRAG_ATTRIB_MAX) { + rc_copy_output(&compiler->Base, + VERT_RESULT_HPOS, + compiler->state.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0); + } + if (compiler->state.FogAttr != FRAG_ATTRIB_MAX) { - rc_move_output(&compiler->Base, VERT_RESULT_FOGC, compiler->state.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X); + rc_move_output(&compiler->Base, + VERT_RESULT_FOGC, + compiler->state.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X); } addArtificialOutputs(compiler); -- cgit v1.2.3 From d6a304800b2385740f3b90efab45564e1e6203b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Sat, 25 Jul 2009 00:50:53 +0200 Subject: r300: Remove ugly PSIZ hack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of setting Sourced, we simply force writemasks to begin with. Signed-off-by: Nicolai Hähnle --- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c') diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index c05b488645..14dd36354d 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -607,12 +607,8 @@ static void nqssadceInit(struct nqssadce_state* s) int i; for(i = 0; i < VERT_RESULT_MAX; ++i) { - if (compiler->RequiredOutputs & (1 << i)) { - if (i != VERT_RESULT_PSIZ) - s->Outputs[i].Sourced = WRITEMASK_XYZW; - else - s->Outputs[i].Sourced = WRITEMASK_X; /* ugly hack! */ - } + if (compiler->RequiredOutputs & (1 << i)) + s->Outputs[i].Sourced = WRITEMASK_XYZW; } } @@ -631,6 +627,8 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) rc_mesa_to_rc_program(&compiler->Base, compiler->program); compiler->program = 0; + rc_move_output(&compiler->Base, VERT_RESULT_PSIZ, VERT_RESULT_PSIZ, WRITEMASK_X); + if (compiler->state.WPosAttr != FRAG_ATTRIB_MAX) { rc_copy_output(&compiler->Base, VERT_RESULT_HPOS, -- cgit v1.2.3 From 3f7838168781b69aea04514a57058d0aa0cc2cbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Sat, 25 Jul 2009 00:59:31 +0200 Subject: r300/vertprog: Move Mesa-dependent input/output handling out of compiler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nicolai Hähnle --- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 92 +--------------------- 1 file changed, 1 insertion(+), 91 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c') diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index 14dd36354d..339be414cf 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -310,79 +310,6 @@ static void ei_pow(struct r300_vertex_program_code *vp, inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]); } -static void t_inputs_outputs(struct r300_vertex_program_compiler * c) -{ - int i; - int cur_reg; - GLuint OutputsWritten, InputsRead; - - OutputsWritten = c->Base.Program.OutputsWritten; - InputsRead = c->Base.Program.InputsRead; - - cur_reg = -1; - for (i = 0; i < VERT_ATTRIB_MAX; i++) { - if (InputsRead & (1 << i)) - c->code->inputs[i] = ++cur_reg; - else - c->code->inputs[i] = -1; - } - - cur_reg = 0; - for (i = 0; i < VERT_RESULT_MAX; i++) - c->code->outputs[i] = -1; - - assert(OutputsWritten & (1 << VERT_RESULT_HPOS)); - - if (OutputsWritten & (1 << VERT_RESULT_HPOS)) { - c->code->outputs[VERT_RESULT_HPOS] = cur_reg++; - } - - if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) { - c->code->outputs[VERT_RESULT_PSIZ] = cur_reg++; - } - - /* If we're writing back facing colors we need to send - * four colors to make front/back face colors selection work. - * If the vertex program doesn't write all 4 colors, lets - * pretend it does by skipping output index reg so the colors - * get written into appropriate output vectors. - */ - if (OutputsWritten & (1 << VERT_RESULT_COL0)) { - c->code->outputs[VERT_RESULT_COL0] = cur_reg++; - } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || - OutputsWritten & (1 << VERT_RESULT_BFC1)) { - cur_reg++; - } - - if (OutputsWritten & (1 << VERT_RESULT_COL1)) { - c->code->outputs[VERT_RESULT_COL1] = cur_reg++; - } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || - OutputsWritten & (1 << VERT_RESULT_BFC1)) { - cur_reg++; - } - - if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { - c->code->outputs[VERT_RESULT_BFC0] = cur_reg++; - } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { - cur_reg++; - } - - if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { - c->code->outputs[VERT_RESULT_BFC1] = cur_reg++; - } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { - cur_reg++; - } - - for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) { - if (OutputsWritten & (1 << i)) { - c->code->outputs[i] = cur_reg++; - } - } - - if (OutputsWritten & (1 << VERT_RESULT_FOGC)) { - c->code->outputs[VERT_RESULT_FOGC] = cur_reg++; - } -} static void translate_vertex_program(struct r300_vertex_program_compiler * compiler) { @@ -391,7 +318,7 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi compiler->code->pos_end = 0; /* Not supported yet */ compiler->code->length = 0; - t_inputs_outputs(compiler); + compiler->SetHwInputOutput(compiler); for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) { struct prog_instruction *vpi = &rci->I; @@ -624,23 +551,6 @@ static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg) void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) { - rc_mesa_to_rc_program(&compiler->Base, compiler->program); - compiler->program = 0; - - rc_move_output(&compiler->Base, VERT_RESULT_PSIZ, VERT_RESULT_PSIZ, WRITEMASK_X); - - if (compiler->state.WPosAttr != FRAG_ATTRIB_MAX) { - rc_copy_output(&compiler->Base, - VERT_RESULT_HPOS, - compiler->state.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0); - } - - if (compiler->state.FogAttr != FRAG_ATTRIB_MAX) { - rc_move_output(&compiler->Base, - VERT_RESULT_FOGC, - compiler->state.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X); - } - addArtificialOutputs(compiler); { -- cgit v1.2.3 From 59fff53492fb385f8567c163aed014fdd9c0f8fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Mon, 27 Jul 2009 19:29:21 +0200 Subject: r300/compiler: Add vertex program code dumper from Gallium driver --- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c') diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index 339be414cf..fc9c8f805a 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -607,4 +607,9 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) compiler->code->InputsRead = compiler->Base.Program.InputsRead; compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten; + + if (compiler->Base.Debug) { + printf("Final vertex program code:\n"); + r300_vertex_program_dump(compiler->code); + } } -- cgit v1.2.3