diff options
Diffstat (limited to 'src/mesa/drivers/dri/r300/r300_fragprog.c')
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_fragprog.c | 1111 |
1 files changed, 710 insertions, 401 deletions
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 32c0128eaa..8e45bd5403 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -33,7 +33,6 @@ /*TODO'S * - * - COS/SIN/SCS instructions * - Depth write, WPOS/FOGC inputs * - FogOption * - Verify results of opcodes for accuracy, I've only checked them @@ -51,18 +50,110 @@ #include "r300_fragprog.h" #include "r300_reg.h" +/* + * Usefull macros and values + */ +#define ERROR(fmt, args...) do { \ + fprintf(stderr, "%s::%s(): " fmt "\n", \ + __FILE__, __func__, ##args); \ + rp->error = GL_TRUE; \ + } while(0) + #define PFS_INVAL 0xFFFFFFFF #define COMPILE_STATE struct r300_pfs_compile_state *cs = rp->cs -static void dump_program(struct r300_fragment_program *rp); -static void emit_arith(struct r300_fragment_program *rp, int op, - pfs_reg_t dest, int mask, - pfs_reg_t src0, pfs_reg_t src1, pfs_reg_t src2, - int flags); +#define SWIZZLE_XYZ 0 +#define SWIZZLE_XXX 1 +#define SWIZZLE_YYY 2 +#define SWIZZLE_ZZZ 3 +#define SWIZZLE_WWW 4 +#define SWIZZLE_YZX 5 +#define SWIZZLE_ZXY 6 +#define SWIZZLE_WZY 7 +#define SWIZZLE_111 8 +#define SWIZZLE_000 9 +#define SWIZZLE_HHH 10 + +#define swizzle(r, x, y, z, w) do_swizzle(rp, r, \ + ((SWIZZLE_##x<<0)| \ + (SWIZZLE_##y<<3)| \ + (SWIZZLE_##z<<6)| \ + (SWIZZLE_##w<<9)), \ + 0) + +#define REG_TYPE_INPUT 0 +#define REG_TYPE_OUTPUT 1 +#define REG_TYPE_TEMP 2 +#define REG_TYPE_CONST 3 + +#define REG_TYPE_SHIFT 0 +#define REG_INDEX_SHIFT 2 +#define REG_VSWZ_SHIFT 8 +#define REG_SSWZ_SHIFT 13 +#define REG_NEGV_SHIFT 18 +#define REG_NEGS_SHIFT 19 +#define REG_ABS_SHIFT 20 +#define REG_NO_USE_SHIFT 21 +#define REG_VALID_SHIFT 22 + +#define REG_TYPE_MASK (0x03 << REG_TYPE_SHIFT) +#define REG_INDEX_MASK (0x3F << REG_INDEX_SHIFT) +#define REG_VSWZ_MASK (0x1F << REG_VSWZ_SHIFT) +#define REG_SSWZ_MASK (0x1F << REG_SSWZ_SHIFT) +#define REG_NEGV_MASK (0x01 << REG_NEGV_SHIFT) +#define REG_NEGS_MASK (0x01 << REG_NEGS_SHIFT) +#define REG_ABS_MASK (0x01 << REG_ABS_SHIFT) +#define REG_NO_USE_MASK (0x01 << REG_NO_USE_SHIFT) +#define REG_VALID_MASK (0x01 << REG_VALID_SHIFT) + +#define REG(type, index, vswz, sswz, nouse, valid) \ + (((type << REG_TYPE_SHIFT) & REG_TYPE_MASK) | \ + ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK) | \ + ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK) | \ + ((valid << REG_VALID_SHIFT) & REG_VALID_MASK) | \ + ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK) | \ + ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK)) +#define REG_GET_TYPE(reg) \ + ((reg & REG_TYPE_MASK) >> REG_TYPE_SHIFT) +#define REG_GET_INDEX(reg) \ + ((reg & REG_INDEX_MASK) >> REG_INDEX_SHIFT) +#define REG_GET_VSWZ(reg) \ + ((reg & REG_VSWZ_MASK) >> REG_VSWZ_SHIFT) +#define REG_GET_SSWZ(reg) \ + ((reg & REG_SSWZ_MASK) >> REG_SSWZ_SHIFT) +#define REG_GET_NO_USE(reg) \ + ((reg & REG_NO_USE_MASK) >> REG_NO_USE_SHIFT) +#define REG_GET_VALID(reg) \ + ((reg & REG_VALID_MASK) >> REG_VALID_SHIFT) +#define REG_SET_TYPE(reg, type) \ + reg = ((reg & ~REG_TYPE_MASK) | \ + ((type << REG_TYPE_SHIFT) & REG_TYPE_MASK)) +#define REG_SET_INDEX(reg, index) \ + reg = ((reg & ~REG_INDEX_MASK) | \ + ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK)) +#define REG_SET_VSWZ(reg, vswz) \ + reg = ((reg & ~REG_VSWZ_MASK) | \ + ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK)) +#define REG_SET_SSWZ(reg, sswz) \ + reg = ((reg & ~REG_SSWZ_MASK) | \ + ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK)) +#define REG_SET_NO_USE(reg, nouse) \ + reg = ((reg & ~REG_NO_USE_MASK) | \ + ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK)) +#define REG_SET_VALID(reg, valid) \ + reg = ((reg & ~REG_VALID_MASK) | \ + ((valid << REG_VALID_SHIFT) & REG_VALID_MASK)) +#define REG_ABS(reg) \ + reg = (reg | REG_ABS_MASK) +#define REG_NEGV(reg) \ + reg = (reg | REG_NEGV_MASK) +#define REG_NEGS(reg) \ + reg = (reg | REG_NEGS_MASK) -/*************************************** - * begin: useful data structions for fragment program generation - ***************************************/ + +/* + * Datas structures for fragment program generation + */ /* description of r300 native hw instructions */ static const struct { @@ -86,20 +177,23 @@ static const struct { { "CMPH", 3, R300_FPI0_OUTC_CMPH, PFS_INVAL }, }; -#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, \ - SWIZZLE_##y, \ - SWIZZLE_##z, \ - SWIZZLE_ZERO)) - -#define SLOT_VECTOR (1<<0) -#define SLOT_SCALAR (1<<3) -#define SLOT_BOTH (SLOT_VECTOR|SLOT_SCALAR) /* vector swizzles r300 can support natively, with a couple of * cases we handle specially * - * pfs_reg_t.v_swz/pfs_reg_t.s_swz is an index into this table - **/ + * REG_VSWZ/REG_SSWZ is an index into this table + */ +#define SLOT_VECTOR (1<<0) +#define SLOT_SCALAR (1<<3) +#define SLOT_BOTH (SLOT_VECTOR | SLOT_SCALAR) + +/* mapping from SWIZZLE_* to r300 native values for scalar insns */ +#define SWIZZLE_HALF 6 + +#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, \ + SWIZZLE_##y, \ + SWIZZLE_##z, \ + SWIZZLE_ZERO)) static const struct r300_pfs_swizzle { GLuint hash; /* swizzle value this matches */ GLuint base; /* base value for hw swizzle */ @@ -117,42 +211,30 @@ static const struct r300_pfs_swizzle { { MAKE_SWZ3(W, Z, Y), R300_FPI0_ARGC_SRC0CA_WZY, 1, SLOT_BOTH }, { MAKE_SWZ3(ONE, ONE, ONE), R300_FPI0_ARGC_ONE, 0, 0}, { MAKE_SWZ3(ZERO, ZERO, ZERO), R300_FPI0_ARGC_ZERO, 0, 0}, - { PFS_INVAL, R300_FPI0_ARGC_HALF, 0, 0}, + { MAKE_SWZ3(HALF, HALF, HALF), R300_FPI0_ARGC_HALF, 0, 0}, { PFS_INVAL, 0, 0, 0}, }; -#define SWIZZLE_XYZ 0 -#define SWIZZLE_XXX 1 -#define SWIZZLE_YYY 2 -#define SWIZZLE_ZZZ 3 -#define SWIZZLE_WWW 4 -#define SWIZZLE_YZX 5 -#define SWIZZLE_ZXY 6 -#define SWIZZLE_WZY 7 -#define SWIZZLE_111 8 -#define SWIZZLE_000 9 -#define SWIZZLE_HHH 10 +/* used during matching of non-native swizzles */ #define SWZ_X_MASK (7 << 0) #define SWZ_Y_MASK (7 << 3) #define SWZ_Z_MASK (7 << 6) #define SWZ_W_MASK (7 << 9) -/* used during matching of non-native swizzles */ static const struct { - GLuint hash; /* used to mask matching swizzle components */ + GLuint hash; /* used to mask matching swizzle components */ int mask; /* actual outmask */ int count; /* count of components matched */ } s_mask[] = { - { SWZ_X_MASK|SWZ_Y_MASK|SWZ_Z_MASK, 1|2|4, 3}, - { SWZ_X_MASK|SWZ_Y_MASK, 1|2, 2}, - { SWZ_X_MASK|SWZ_Z_MASK, 1|4, 2}, - { SWZ_Y_MASK|SWZ_Z_MASK, 2|4, 2}, - { SWZ_X_MASK, 1, 1}, - { SWZ_Y_MASK, 2, 1}, - { SWZ_Z_MASK, 4, 1}, - { PFS_INVAL, PFS_INVAL, PFS_INVAL} + { SWZ_X_MASK|SWZ_Y_MASK|SWZ_Z_MASK, 1|2|4, 3}, + { SWZ_X_MASK|SWZ_Y_MASK, 1|2, 2}, + { SWZ_X_MASK|SWZ_Z_MASK, 1|4, 2}, + { SWZ_Y_MASK|SWZ_Z_MASK, 2|4, 2}, + { SWZ_X_MASK, 1, 1}, + { SWZ_Y_MASK, 2, 1}, + { SWZ_Z_MASK, 4, 1}, + { PFS_INVAL, PFS_INVAL, PFS_INVAL} }; -/* mapping from SWIZZLE_* to r300 native values for scalar insns */ static const struct { int base; /* hw value of swizzle */ int stride; /* difference between SRC0/1/2 */ @@ -166,58 +248,51 @@ static const struct { { R300_FPI2_ARGA_ONE , 0, 0 }, { R300_FPI2_ARGA_HALF , 0, 0 } }; -#define SWIZZLE_HALF 6 /* boiler-plate reg, for convenience */ -static const pfs_reg_t undef = { - type: REG_TYPE_TEMP, - index: 0, - v_swz: SWIZZLE_XYZ, - s_swz: SWIZZLE_W, - negate_v: 0, - negate_s: 0, - absolute: 0, - no_use: GL_FALSE, - valid: GL_FALSE -}; +static const GLuint undef = REG(REG_TYPE_TEMP, + 0, + SWIZZLE_XYZ, + SWIZZLE_W, + GL_FALSE, + GL_FALSE); /* constant one source */ -static const pfs_reg_t pfs_one = { - type: REG_TYPE_CONST, - index: 0, - v_swz: SWIZZLE_111, - s_swz: SWIZZLE_ONE, - valid: GL_TRUE -}; +static const GLuint pfs_one = REG(REG_TYPE_CONST, + 0, + SWIZZLE_111, + SWIZZLE_ONE, + GL_FALSE, + GL_TRUE); /* constant half source */ -static const pfs_reg_t pfs_half = { - type: REG_TYPE_CONST, - index: 0, - v_swz: SWIZZLE_HHH, - s_swz: SWIZZLE_HALF, - valid: GL_TRUE -}; +static const GLuint pfs_half = REG(REG_TYPE_CONST, + 0, + SWIZZLE_HHH, + SWIZZLE_HALF, + GL_FALSE, + GL_TRUE); /* constant zero source */ -static const pfs_reg_t pfs_zero = { - type: REG_TYPE_CONST, - index: 0, - v_swz: SWIZZLE_000, - s_swz: SWIZZLE_ZERO, - valid: GL_TRUE -}; - -/*************************************** - * end: data structures - ***************************************/ +static const GLuint pfs_zero = REG(REG_TYPE_CONST, + 0, + SWIZZLE_000, + SWIZZLE_ZERO, + GL_FALSE, + GL_TRUE); -#define ERROR(fmt, args...) do { \ - fprintf(stderr, "%s::%s(): " fmt "\n",\ - __FILE__, __func__, ##args); \ - rp->error = GL_TRUE; \ -} while(0) +/* + * Common functions prototypes + */ +static void dump_program(struct r300_fragment_program *rp); +static void emit_arith(struct r300_fragment_program *rp, int op, + GLuint dest, int mask, + GLuint src0, GLuint src1, GLuint src2, + int flags); +/* + * Helper functions prototypes + */ static int get_hw_temp(struct r300_fragment_program *rp) { COMPILE_STATE; @@ -256,263 +331,360 @@ static void free_hw_temp(struct r300_fragment_program *rp, int idx) cs->hwreg_in_use &= ~(1<<idx); } -static pfs_reg_t get_temp_reg(struct r300_fragment_program *rp) +static GLuint get_temp_reg(struct r300_fragment_program *rp) { COMPILE_STATE; - pfs_reg_t r = undef; + GLuint r = undef; + GLuint index; - r.index = ffs(~cs->temp_in_use); - if (!r.index) { + index = ffs(~cs->temp_in_use); + if (!index) { ERROR("Out of program temps\n"); return r; } - cs->temp_in_use |= (1 << --r.index); - - cs->temps[r.index].refcount = 0xFFFFFFFF; - cs->temps[r.index].reg = -1; - r.valid = GL_TRUE; + + cs->temp_in_use |= (1 << --index); + cs->temps[index].refcount = 0xFFFFFFFF; + cs->temps[index].reg = -1; + + REG_SET_TYPE(r, REG_TYPE_TEMP); + REG_SET_INDEX(r, index); + REG_SET_VALID(r, GL_TRUE); return r; } -static pfs_reg_t get_temp_reg_tex(struct r300_fragment_program *rp) +static GLuint get_temp_reg_tex(struct r300_fragment_program *rp) { COMPILE_STATE; - pfs_reg_t r = undef; + GLuint r = undef; + GLuint index; - r.index = ffs(~cs->temp_in_use); - if (!r.index) { + index = ffs(~cs->temp_in_use); + if (!index) { ERROR("Out of program temps\n"); return r; } - cs->temp_in_use |= (1 << --r.index); - - cs->temps[r.index].refcount = 0xFFFFFFFF; - cs->temps[r.index].reg = get_hw_temp_tex(rp); - r.valid = GL_TRUE; + + cs->temp_in_use |= (1 << --index); + cs->temps[index].refcount = 0xFFFFFFFF; + cs->temps[index].reg = get_hw_temp_tex(rp); + + REG_SET_TYPE(r, REG_TYPE_TEMP); + REG_SET_INDEX(r, index); + REG_SET_VALID(r, GL_TRUE); return r; } -static void free_temp(struct r300_fragment_program *rp, pfs_reg_t r) +static void free_temp(struct r300_fragment_program *rp, GLuint r) { COMPILE_STATE; - if (!(cs->temp_in_use & (1<<r.index))) return; + GLuint index = REG_GET_INDEX(r); + + if (!(cs->temp_in_use & (1 << index))) + return; - if (r.type == REG_TYPE_TEMP) { - free_hw_temp(rp, cs->temps[r.index].reg); - cs->temps[r.index].reg = -1; - cs->temp_in_use &= ~(1<<r.index); - } else if (r.type == REG_TYPE_INPUT) { - free_hw_temp(rp, cs->inputs[r.index].reg); - cs->inputs[r.index].reg = -1; + if (REG_GET_TYPE(r) == REG_TYPE_TEMP) { + free_hw_temp(rp, cs->temps[index].reg); + cs->temps[index].reg = -1; + cs->temp_in_use &= ~(1 << index); + } else if (REG_GET_TYPE(r) == REG_TYPE_INPUT) { + free_hw_temp(rp, cs->inputs[index].reg); + cs->inputs[index].reg = -1; } } -static pfs_reg_t emit_param4fv(struct r300_fragment_program *rp, - GLfloat *values) +static GLuint emit_param4fv(struct r300_fragment_program *rp, + GLfloat *values) { - pfs_reg_t r = undef; - r.type = REG_TYPE_CONST; + GLuint r = undef; + GLuint index; int pidx; pidx = rp->param_nr++; - r.index = rp->const_nr++; - if (pidx >= PFS_NUM_CONST_REGS || r.index >= PFS_NUM_CONST_REGS) { + index = rp->const_nr++; + if (pidx >= PFS_NUM_CONST_REGS || index >= PFS_NUM_CONST_REGS) { ERROR("Out of const/param slots!\n"); return r; } - - rp->param[pidx].idx = r.index; + + rp->param[pidx].idx = index; rp->param[pidx].values = values; rp->params_uptodate = GL_FALSE; - r.valid = GL_TRUE; + REG_SET_TYPE(r, REG_TYPE_CONST); + REG_SET_INDEX(r, index); + REG_SET_VALID(r, GL_TRUE); return r; } -static pfs_reg_t emit_const4fv(struct r300_fragment_program *rp, GLfloat *cp) +static GLuint emit_const4fv(struct r300_fragment_program *rp, GLfloat *cp) { - pfs_reg_t r = undef; - r.type = REG_TYPE_CONST; + GLuint r = undef; + GLuint index; - r.index = rp->const_nr++; - if (r.index >= PFS_NUM_CONST_REGS) { + index = rp->const_nr++; + if (index >= PFS_NUM_CONST_REGS) { ERROR("Out of hw constants!\n"); return r; } - COPY_4V(rp->constant[r.index], cp); - r.valid = GL_TRUE; + COPY_4V(rp->constant[index], cp); + + REG_SET_TYPE(r, REG_TYPE_CONST); + REG_SET_INDEX(r, index); + REG_SET_VALID(r, GL_TRUE); return r; } -static __inline pfs_reg_t negate(pfs_reg_t r) +static inline GLuint negate(GLuint r) { - r.negate_v = 1; - r.negate_s = 1; + REG_NEGS(r); + REG_NEGV(r); return r; } /* Hack, to prevent clobbering sources used multiple times when * emulating non-native instructions */ -static __inline pfs_reg_t keep(pfs_reg_t r) +static inline GLuint keep(GLuint r) { - r.no_use = GL_TRUE; + REG_SET_NO_USE(r, GL_TRUE); return r; } -static __inline pfs_reg_t absolute(pfs_reg_t r) +static inline GLuint absolute(GLuint r) { - r.absolute = 1; + REG_ABS(r); return r; } static int swz_native(struct r300_fragment_program *rp, - pfs_reg_t src, pfs_reg_t *r, GLuint arbneg) + GLuint src, + GLuint *r, + GLuint arbneg) { - /* Native swizzle, nothing to see here */ - src.negate_s = (arbneg >> 3) & 1; + /* Native swizzle, handle negation */ + src = (src & ~REG_NEGS_MASK) | + (((arbneg >> 3) & 1) << REG_NEGS_SHIFT); if ((arbneg & 0x7) == 0x0) { - src.negate_v = 0; + src = src & ~REG_NEGV_MASK; *r = src; } else if ((arbneg & 0x7) == 0x7) { - src.negate_v = 1; + src |= REG_NEGV_MASK; *r = src; } else { - if (!r->valid) + if (!REG_GET_VALID(*r)) *r = get_temp_reg(rp); - src.negate_v = 1; - emit_arith(rp, PFS_OP_MAD, *r, arbneg & 0x7, - keep(src), pfs_one, pfs_zero, 0); - src.negate_v = 0; - emit_arith(rp, PFS_OP_MAD, *r, + src |= REG_NEGV_MASK; + emit_arith(rp, + PFS_OP_MAD, + *r, + arbneg & 0x7, + keep(src), + pfs_one, + pfs_zero, + 0); + src = src & ~REG_NEGV_MASK; + emit_arith(rp, + PFS_OP_MAD, + *r, (arbneg ^ 0x7) | WRITEMASK_W, - src, pfs_one, pfs_zero, 0); + src, + pfs_one, + pfs_zero, + 0); } return 3; } -static int swz_emit_partial(struct r300_fragment_program *rp, pfs_reg_t src, - pfs_reg_t *r, int mask, int mc, GLuint arbneg) +static int swz_emit_partial(struct r300_fragment_program *rp, + GLuint src, + GLuint *r, + int mask, + int mc, + GLuint arbneg) { GLuint tmp; GLuint wmask = 0; - if (!r->valid) + if (!REG_GET_VALID(*r)) *r = get_temp_reg(rp); - /* A partial match, src.v_swz/mask define what parts of the - * desired swizzle we match */ + /* A partial match, VSWZ/mask define what parts of the + * desired swizzle we match + */ if (mc + s_mask[mask].count == 3) { wmask = WRITEMASK_W; - src.negate_s = (arbneg >> 3) & 1; + src |= ((arbneg >> 3) & 1) << REG_NEGS_SHIFT; } tmp = arbneg & s_mask[mask].mask; if (tmp) { tmp = tmp ^ s_mask[mask].mask; if (tmp) { - src.negate_v = 1; - emit_arith(rp, PFS_OP_MAD, *r, + emit_arith(rp, + PFS_OP_MAD, + *r, arbneg & s_mask[mask].mask, - keep(src), pfs_one, pfs_zero, 0); - src.negate_v = 0; - if (!wmask) src.no_use = GL_TRUE; - else src.no_use = GL_FALSE; - emit_arith(rp, PFS_OP_MAD, *r, tmp | wmask, - src, pfs_one, pfs_zero, 0); + keep(src) | REG_NEGV_MASK, + pfs_one, + pfs_zero, + 0); + if (!wmask) { + REG_SET_NO_USE(src, GL_TRUE); + } else { + REG_SET_NO_USE(src, GL_FALSE); + } + emit_arith(rp, + PFS_OP_MAD, + *r, + tmp | wmask, + src, + pfs_one, + pfs_zero, + 0); } else { - src.negate_v = 1; - if (!wmask) src.no_use = GL_TRUE; - else src.no_use = GL_FALSE; - emit_arith(rp, PFS_OP_MAD, *r, + if (!wmask) { + REG_SET_NO_USE(src, GL_TRUE); + } else { + REG_SET_NO_USE(src, GL_FALSE); + } + emit_arith(rp, + PFS_OP_MAD, + *r, (arbneg & s_mask[mask].mask) | wmask, - src, pfs_one, pfs_zero, 0); - src.negate_v = 0; + src | REG_NEGV_MASK, + pfs_one, + pfs_zero, + 0); } } else { - if (!wmask) src.no_use = GL_TRUE; - else src.no_use = GL_FALSE; - emit_arith(rp, PFS_OP_MAD, *r, + if (!wmask) { + REG_SET_NO_USE(src, GL_TRUE); + } else { + REG_SET_NO_USE(src, GL_FALSE); + } + emit_arith(rp, PFS_OP_MAD, + *r, s_mask[mask].mask | wmask, - src, pfs_one, pfs_zero, 0); + src, + pfs_one, + pfs_zero, + 0); } return s_mask[mask].count; } -#define swizzle(r, x, y, z, w) do_swizzle(rp, r, \ - ((SWIZZLE_##x<<0)| \ - (SWIZZLE_##y<<3)| \ - (SWIZZLE_##z<<6)| \ - (SWIZZLE_##w<<9)), \ - 0) - -static pfs_reg_t do_swizzle(struct r300_fragment_program *rp, - pfs_reg_t src, GLuint arbswz, GLuint arbneg) +static GLuint do_swizzle(struct r300_fragment_program *rp, + GLuint src, + GLuint arbswz, + GLuint arbneg) { - pfs_reg_t r = undef; - + GLuint r = undef; + GLuint vswz; int c_mask = 0; - int v_matched = 0; + int v_match = 0; /* If swizzling from something without an XYZW native swizzle, * emit result to a temp, and do new swizzle from the temp. */ - if (src.v_swz != SWIZZLE_XYZ || src.s_swz != SWIZZLE_W) { - pfs_reg_t temp = get_temp_reg(rp); - emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_XYZW, src, pfs_one, - pfs_zero, 0); +#if 0 + if (REG_GET_VSWZ(src) != SWIZZLE_XYZ || + REG_GET_SSWZ(src) != SWIZZLE_W) { + GLuint temp = get_temp_reg(rp); + emit_arith(rp, + PFS_OP_MAD, + temp, + WRITEMASK_XYZW, + src, + pfs_one, + pfs_zero, + 0); src = temp; } - src.s_swz = GET_SWZ(arbswz, 3); +#endif + + if (REG_GET_VSWZ(src) != SWIZZLE_XYZ || + REG_GET_SSWZ(src) != SWIZZLE_W) { + GLuint vsrcswz = (v_swiz[REG_GET_VSWZ(src)].hash & (SWZ_X_MASK|SWZ_Y_MASK|SWZ_Z_MASK)) | REG_GET_SSWZ(src) << 9; + GLint i; + + GLuint newswz = 0; + GLuint offset; + for(i=0; i < 4; ++i){ + offset = GET_SWZ(arbswz, i); + + newswz |= (offset <= 3)?GET_SWZ(vsrcswz, offset) << i*3:offset << i*3; + } + + arbswz = newswz & (SWZ_X_MASK|SWZ_Y_MASK|SWZ_Z_MASK); + REG_SET_SSWZ(src, GET_SWZ(newswz, 3)); + } + else + { + /* set scalar swizzling */ + REG_SET_SSWZ(src, GET_SWZ(arbswz, 3)); + } do { + vswz = REG_GET_VSWZ(src); do { -#define CUR_HASH (v_swiz[src.v_swz].hash & s_mask[c_mask].hash) - if (CUR_HASH == (arbswz & s_mask[c_mask].hash)) { - if (s_mask[c_mask].count == 3) - v_matched += swz_native(rp, src, &r, + int chash; + + REG_SET_VSWZ(src, vswz); + chash = v_swiz[REG_GET_VSWZ(src)].hash & + s_mask[c_mask].hash; + + if (chash == (arbswz & s_mask[c_mask].hash)) { + if (s_mask[c_mask].count == 3) { + v_match += swz_native(rp, + src, + &r, arbneg); - else - v_matched += swz_emit_partial(rp, src, - &r, - c_mask, - v_matched, - arbneg); - - if (v_matched == 3) + } else { + v_match += swz_emit_partial(rp, + src, + &r, + c_mask, + v_match, + arbneg); + } + + if (v_match == 3) return r; /* Fill with something invalid.. all 0's was * wrong before, matched SWIZZLE_X. So all - * 1's will be okay for now */ + * 1's will be okay for now + */ arbswz |= (PFS_INVAL & s_mask[c_mask].hash); } - } while(v_swiz[++src.v_swz].hash != PFS_INVAL); - src.v_swz = SWIZZLE_XYZ; + } while(v_swiz[++vswz].hash != PFS_INVAL); + REG_SET_VSWZ(src, SWIZZLE_XYZ); } while (s_mask[++c_mask].hash != PFS_INVAL); ERROR("should NEVER get here\n"); return r; } - -static pfs_reg_t t_src(struct r300_fragment_program *rp, - struct prog_src_register fpsrc) + +static GLuint t_src(struct r300_fragment_program *rp, + struct prog_src_register fpsrc) { - pfs_reg_t r = undef; + GLuint r = undef; switch (fpsrc.File) { case PROGRAM_TEMPORARY: - r.index = fpsrc.Index; - r.valid = GL_TRUE; + REG_SET_INDEX(r, fpsrc.Index); + REG_SET_VALID(r, GL_TRUE); + REG_SET_TYPE(r, REG_TYPE_TEMP); break; case PROGRAM_INPUT: - r.index = fpsrc.Index; - r.type = REG_TYPE_INPUT; - r.valid = GL_TRUE; + REG_SET_INDEX(r, fpsrc.Index); + REG_SET_VALID(r, GL_TRUE); + REG_SET_TYPE(r, REG_TYPE_INPUT); break; case PROGRAM_LOCAL_PARAM: r = emit_param4fv(rp, @@ -533,13 +705,13 @@ static pfs_reg_t t_src(struct r300_fragment_program *rp, } /* no point swizzling ONE/ZERO/HALF constants... */ - if (r.v_swz < SWIZZLE_111 || r.s_swz < SWIZZLE_ZERO) + if (REG_GET_VSWZ(r) < SWIZZLE_111 || REG_GET_SSWZ(r) < SWIZZLE_ZERO) r = do_swizzle(rp, r, fpsrc.Swizzle, fpsrc.NegateBase); return r; } -static pfs_reg_t t_scalar_src(struct r300_fragment_program *rp, - struct prog_src_register fpsrc) +static GLuint t_scalar_src(struct r300_fragment_program *rp, + struct prog_src_register fpsrc) { struct prog_src_register src = fpsrc; int sc = GET_SWZ(fpsrc.Swizzle, 0); /* X */ @@ -549,22 +721,24 @@ static pfs_reg_t t_scalar_src(struct r300_fragment_program *rp, return t_src(rp, src); } -static pfs_reg_t t_dst(struct r300_fragment_program *rp, - struct prog_dst_register dest) { - pfs_reg_t r = undef; +static GLuint t_dst(struct r300_fragment_program *rp, + struct prog_dst_register dest) +{ + GLuint r = undef; switch (dest.File) { case PROGRAM_TEMPORARY: - r.index = dest.Index; - r.valid = GL_TRUE; + REG_SET_INDEX(r, dest.Index); + REG_SET_VALID(r, GL_TRUE); + REG_SET_TYPE(r, REG_TYPE_TEMP); return r; case PROGRAM_OUTPUT: - r.type = REG_TYPE_OUTPUT; + REG_SET_TYPE(r, REG_TYPE_OUTPUT); switch (dest.Index) { case FRAG_RESULT_COLR: case FRAG_RESULT_DEPR: - r.index = dest.Index; - r.valid = GL_TRUE; + REG_SET_INDEX(r, dest.Index); + REG_SET_VALID(r, GL_TRUE); return r; default: ERROR("Bad DstReg->Index 0x%x\n", dest.Index); @@ -576,66 +750,77 @@ static pfs_reg_t t_dst(struct r300_fragment_program *rp, } } -static int t_hw_src(struct r300_fragment_program *rp, pfs_reg_t src, +static int t_hw_src(struct r300_fragment_program *rp, + GLuint src, GLboolean tex) { COMPILE_STATE; int idx; + int index = REG_GET_INDEX(src); - switch (src.type) { + switch(REG_GET_TYPE(src)) { case REG_TYPE_TEMP: /* NOTE: if reg==-1 here, a source is being read that - * hasn't been written to. Undefined results */ - if (cs->temps[src.index].reg == -1) - cs->temps[src.index].reg = get_hw_temp(rp); - idx = cs->temps[src.index].reg; + * hasn't been written to. Undefined results + */ + if (cs->temps[index].reg == -1) + cs->temps[index].reg = get_hw_temp(rp); + + idx = cs->temps[index].reg; - if (!src.no_use && (--cs->temps[src.index].refcount == 0)) + if (!REG_GET_NO_USE(src) && + (--cs->temps[index].refcount == 0)) free_temp(rp, src); break; case REG_TYPE_INPUT: - idx = cs->inputs[src.index].reg; + idx = cs->inputs[index].reg; - if (!src.no_use && (--cs->inputs[src.index].refcount == 0)) - free_hw_temp(rp, cs->inputs[src.index].reg); + if (!REG_GET_NO_USE(src) && + (--cs->inputs[index].refcount == 0)) + free_hw_temp(rp, cs->inputs[index].reg); break; case REG_TYPE_CONST: - return (src.index | SRC_CONST); + return (index | SRC_CONST); default: ERROR("Invalid type for source reg\n"); return (0 | SRC_CONST); } - if (!tex) cs->used_in_node |= (1 << idx); + if (!tex) + cs->used_in_node |= (1 << idx); return idx; } -static int t_hw_dst(struct r300_fragment_program *rp, pfs_reg_t dest, +static int t_hw_dst(struct r300_fragment_program *rp, + GLuint dest, GLboolean tex) { COMPILE_STATE; int idx; - assert(dest.valid); + GLuint index = REG_GET_INDEX(dest); + assert(REG_GET_VALID(dest)); - switch (dest.type) { + switch(REG_GET_TYPE(dest)) { case REG_TYPE_TEMP: - if (cs->temps[dest.index].reg == -1) { - if (!tex) - cs->temps[dest.index].reg = get_hw_temp(rp); - else - cs->temps[dest.index].reg = get_hw_temp_tex(rp); + if (cs->temps[REG_GET_INDEX(dest)].reg == -1) { + if (!tex) { + cs->temps[index].reg = get_hw_temp(rp); + } else { + cs->temps[index].reg = get_hw_temp_tex(rp); + } } - idx = cs->temps[dest.index].reg; + idx = cs->temps[index].reg; - if (!dest.no_use && (--cs->temps[dest.index].refcount == 0)) + if (!REG_GET_NO_USE(dest) && + (--cs->temps[index].refcount == 0)) free_temp(rp, dest); cs->dest_in_node |= (1 << idx); cs->used_in_node |= (1 << idx); break; case REG_TYPE_OUTPUT: - switch (dest.index) { + switch(index) { case FRAG_RESULT_COLR: rp->node[rp->cur_node].flags |= R300_PFS_NODE_OUTPUT_COLOR; break; @@ -643,17 +828,18 @@ static int t_hw_dst(struct r300_fragment_program *rp, pfs_reg_t dest, rp->node[rp->cur_node].flags |= R300_PFS_NODE_OUTPUT_DEPTH; break; } - return dest.index; + return index; break; default: - ERROR("invalid dest reg type %d\n", dest.type); + ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest)); return 0; } return idx; } -static void emit_nop(struct r300_fragment_program *rp, GLuint mask, +static void emit_nop(struct r300_fragment_program *rp, + GLuint mask, GLboolean sync) { COMPILE_STATE; @@ -679,8 +865,8 @@ static void emit_tex(struct r300_fragment_program *rp, int opcode) { COMPILE_STATE; - pfs_reg_t coord = t_src(rp, fpi->SrcReg[0]); - pfs_reg_t dest = undef, rdest = undef; + GLuint coord = t_src(rp, fpi->SrcReg[0]); + GLuint dest = undef, rdest = undef; GLuint din = cs->dest_in_node, uin = cs->used_in_node; int unit = fpi->TexSrcUnit; int hwsrc, hwdest; @@ -691,7 +877,7 @@ static void emit_tex(struct r300_fragment_program *rp, dest = t_dst(rp, fpi->DstReg); /* r300 doesn't seem to be able to do TEX->output reg */ - if (dest.type == REG_TYPE_OUTPUT) { + if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { rdest = dest; dest = get_temp_reg_tex(rp); } @@ -703,7 +889,7 @@ static void emit_tex(struct r300_fragment_program *rp, if (uin & (1 << hwdest)) { free_hw_temp(rp, hwdest); hwdest = get_hw_temp_tex(rp); - cs->temps[dest.index].reg = hwdest; + cs->temps[REG_GET_INDEX(dest)].reg = hwdest; } } else { hwdest = 0; @@ -713,8 +899,8 @@ static void emit_tex(struct r300_fragment_program *rp, /* Indirection if source has been written in this node, or if the * dest has been read/written in this node */ - if ((coord.type != REG_TYPE_CONST && (din & (1<<hwsrc))) || - (uin & (1<<hwdest))) { + if ((REG_GET_TYPE(coord) != REG_TYPE_CONST && + (din & (1<<hwsrc))) || (uin & (1<<hwdest))) { /* Finish off current node */ cs->v_pos = cs->s_pos = MAX2(cs->v_pos, cs->s_pos); @@ -754,13 +940,13 @@ static void emit_tex(struct r300_fragment_program *rp, | (opcode << R300_FPITX_OPCODE_SHIFT); cs->dest_in_node |= (1 << hwdest); - if (coord.type != REG_TYPE_CONST) + if (REG_GET_TYPE(coord) != REG_TYPE_CONST) cs->used_in_node |= (1 << hwsrc); rp->node[rp->cur_node].tex_end++; /* Copy from temp to output if needed */ - if (rdest.valid) { + if (REG_GET_VALID(rdest)) { emit_arith(rp, PFS_OP_MAD, rdest, WRITEMASK_XYZW, dest, pfs_one, pfs_zero, 0); free_temp(rp, dest); @@ -770,7 +956,9 @@ static void emit_tex(struct r300_fragment_program *rp, /* Add sources to FPI1/FPI3 lists. If source is already on list, * reuse the index instead of wasting a source. */ -static int add_src(struct r300_fragment_program *rp, int reg, int pos, +static int add_src(struct r300_fragment_program *rp, + int reg, + int pos, int srcmask) { COMPILE_STATE; @@ -819,9 +1007,12 @@ static int add_src(struct r300_fragment_program *rp, int reg, int pos, * It's not necessary to force the first case, but it makes disassembled * shaders easier to read. */ -static GLboolean force_same_slot(int vop, int sop, - GLboolean emit_vop, GLboolean emit_sop, - int argc, pfs_reg_t *src) +static GLboolean force_same_slot(int vop, + int sop, + GLboolean emit_vop, + GLboolean emit_sop, + int argc, + GLuint *src) { int i; @@ -833,20 +1024,24 @@ static GLboolean force_same_slot(int vop, int sop, if (emit_vop) { for (i=0;i<argc;i++) - if (src[i].v_swz == SWIZZLE_WZY) + if (REG_GET_VSWZ(src[i]) == SWIZZLE_WZY) return GL_TRUE; } return GL_FALSE; } -static void emit_arith(struct r300_fragment_program *rp, int op, - pfs_reg_t dest, int mask, - pfs_reg_t src0, pfs_reg_t src1, pfs_reg_t src2, +static void emit_arith(struct r300_fragment_program *rp, + int op, + GLuint dest, + int mask, + GLuint src0, + GLuint src1, + GLuint src2, int flags) { COMPILE_STATE; - pfs_reg_t src[3] = { src0, src1, src2 }; + GLuint src[3] = { src0, src1, src2 }; int hwsrc[3], sswz[3], vswz[3]; int hwdest; GLboolean emit_vop = GL_FALSE, emit_sop = GL_FALSE; @@ -863,7 +1058,8 @@ static void emit_arith(struct r300_fragment_program *rp, int op, if ((mask & WRITEMASK_W) || vop == R300_FPI0_OUTC_REPL_ALPHA) emit_sop = GL_TRUE; - if (dest.type == REG_TYPE_OUTPUT && dest.index == FRAG_RESULT_DEPR) + if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT && + REG_GET_INDEX(dest) == FRAG_RESULT_DEPR) emit_vop = GL_FALSE; if (force_same_slot(vop, sop, emit_vop, emit_sop, argc, src)) { @@ -879,12 +1075,12 @@ static void emit_arith(struct r300_fragment_program *rp, int op, */ for (i=0;i<3;i++) { if (emit_vop && - (v_swiz[src[i].v_swz].flags & SLOT_SCALAR)) { + (v_swiz[REG_GET_VSWZ(src[i])].flags & SLOT_SCALAR)) { vpos = spos = MAX2(vpos, spos); break; } if (emit_sop && - (s_swiz[src[i].s_swz].flags & SLOT_VECTOR)) { + (s_swiz[REG_GET_SSWZ(src[i])].flags & SLOT_VECTOR)) { vpos = spos = MAX2(vpos, spos); break; } @@ -908,20 +1104,22 @@ static void emit_arith(struct r300_fragment_program *rp, int op, if (emit_vop && vop != R300_FPI0_OUTC_REPL_ALPHA) { srcpos = add_src(rp, hwsrc[i], vpos, - v_swiz[src[i].v_swz].flags); - vswz[i] = (v_swiz[src[i].v_swz].base + - (srcpos * v_swiz[src[i].v_swz].stride)) | - (src[i].negate_v ? ARG_NEG : 0) | - (src[i].absolute ? ARG_ABS : 0); + v_swiz[REG_GET_VSWZ(src[i])].flags); + vswz[i] = (v_swiz[REG_GET_VSWZ(src[i])].base + + (srcpos * + v_swiz[REG_GET_VSWZ(src[i])].stride)) | + ((src[i] & REG_NEGV_MASK) ? ARG_NEG : 0) | + ((src[i] & REG_ABS_MASK) ? ARG_ABS : 0); } else vswz[i] = R300_FPI0_ARGC_ZERO; if (emit_sop) { srcpos = add_src(rp, hwsrc[i], spos, - s_swiz[src[i].s_swz].flags); - sswz[i] = (s_swiz[src[i].s_swz].base + - (srcpos * s_swiz[src[i].s_swz].stride)) | - (src[i].negate_s ? ARG_NEG : 0) | - (src[i].absolute ? ARG_ABS : 0); + s_swiz[REG_GET_SSWZ(src[i])].flags); + sswz[i] = (s_swiz[REG_GET_SSWZ(src[i])].base + + (srcpos * + s_swiz[REG_GET_SSWZ(src[i])].stride)) | + ((src[i] & REG_NEGS_MASK) ? ARG_NEG : 0) | + ((src[i] & REG_ABS_MASK) ? ARG_ABS : 0); } else sswz[i] = R300_FPI2_ARGA_ZERO; } hwdest = t_hw_dst(rp, dest, GL_FALSE); @@ -943,8 +1141,8 @@ static void emit_arith(struct r300_fragment_program *rp, int op, (vswz[2] << R300_FPI0_ARG2C_SHIFT); rp->alu.inst[vpos].inst1 |= hwdest << R300_FPI1_DSTC_SHIFT; - if (dest.type == REG_TYPE_OUTPUT) { - if (dest.index == FRAG_RESULT_COLR) { + if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { + if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { rp->alu.inst[vpos].inst1 |= (mask & WRITEMASK_XYZ) << R300_FPI1_DSTC_OUTPUT_MASK_SHIFT; } else assert(0); @@ -968,11 +1166,11 @@ static void emit_arith(struct r300_fragment_program *rp, int op, sswz[2] << R300_FPI2_ARG2A_SHIFT; if (mask & WRITEMASK_W) { - if (dest.type == REG_TYPE_OUTPUT) { - if (dest.index == FRAG_RESULT_COLR) { + if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { + if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { rp->alu.inst[spos].inst3 |= (hwdest << R300_FPI3_DSTA_SHIFT) | R300_FPI3_DSTA_OUTPUT; - } else if (dest.index == FRAG_RESULT_DEPR) { + } else if (REG_GET_INDEX(dest) == FRAG_RESULT_DEPR) { rp->alu.inst[spos].inst3 |= R300_FPI3_DSTA_DEPTH; } else assert(0); } else { @@ -985,33 +1183,52 @@ static void emit_arith(struct r300_fragment_program *rp, int op, rp->alu.inst[vpos].inst2 = NOP_INST2; return; -}; +} #if 0 -static pfs_reg_t get_attrib(struct r300_fragment_program *rp, GLuint attr) +static GLuint get_attrib(struct r300_fragment_program *rp, GLuint attr) { struct gl_fragment_program *mp = &rp->mesa_program; - pfs_reg_t r = undef; + GLuint r = undef; if (!(mp->Base.InputsRead & (1<<attr))) { ERROR("Attribute %d was not provided!\n", attr); return undef; } - r.type = REG_TYPE_INPUT; - r.index = attr; - r.valid = GL_TRUE; + REG_SET_TYPE(r, REG_TYPE_INPUT); + REG_SET_INDEX(r, attr); + REG_SET_VALID(r, GL_TRUE); return r; } #endif +static void make_sin_const(struct r300_fragment_program *rp) +{ + if(rp->const_sin[0] == -1){ + GLfloat cnstv[4]; + + cnstv[0] = 1.273239545; // 4/PI + cnstv[1] =-0.405284735; // -4/(PI*PI) + cnstv[2] = 3.141592654; // PI + cnstv[3] = 0.2225; // weight + rp->const_sin[0] = emit_const4fv(rp, cnstv); + + cnstv[0] = 0.5; + cnstv[1] = -1.5; + cnstv[2] = 0.159154943; // 1/(2*PI) + cnstv[3] = 6.283185307; // 2*PI + rp->const_sin[1] = emit_const4fv(rp, cnstv); + } +} + static GLboolean parse_program(struct r300_fragment_program *rp) { struct gl_fragment_program *mp = &rp->mesa_program; const struct prog_instruction *inst = mp->Base.Instructions; struct prog_instruction *fpi; - pfs_reg_t src[3], dest, temp; - pfs_reg_t cnst; + GLuint src[3], dest, temp; + GLuint cnst; int flags, mask = 0; GLfloat cnstv[4] = {0.0, 0.0, 0.0, 0.0}; @@ -1058,62 +1275,71 @@ static GLboolean parse_program(struct r300_fragment_program *rp) break; case OPCODE_COS: /* - * cos using taylor serie: - * cos(x) = 1 - x^2/2! + x^4/4! - x^6/6! + * cos using a parabola (see SIN): + * cos(x): + * x += PI/2 + * x = (x/(2*PI))+0.5 + * x = frac(x) + * x = (x*2*PI)-PI + * result = sin(x) */ temp = get_temp_reg(rp); - cnstv[0] = 0.5; - cnstv[1] = 0.041666667; - cnstv[2] = 0.001388889; - cnstv[4] = 0.0; - cnst = emit_const4fv(rp, cnstv); + make_sin_const(rp); src[0] = t_scalar_src(rp, fpi->SrcReg[0]); - emit_arith(rp, PFS_OP_MAD, temp, - WRITEMASK_XYZ, - src[0], - src[0], - pfs_zero, - flags); - emit_arith(rp, PFS_OP_MAD, temp, - WRITEMASK_Y | WRITEMASK_Z, - temp, temp, - pfs_zero, - flags); - emit_arith(rp, PFS_OP_MAD, temp, - WRITEMASK_Z, - temp, - swizzle(temp, X, X, X, W), - pfs_zero, - flags); - emit_arith(rp, PFS_OP_MAD, temp, - WRITEMASK_XYZ, - temp, cnst, - pfs_zero, - flags); - emit_arith(rp, PFS_OP_MAD, temp, - WRITEMASK_X, - pfs_one, - pfs_one, - negate(temp), - flags); - emit_arith(rp, PFS_OP_MAD, temp, - WRITEMASK_X, - temp, - pfs_one, - swizzle(temp, Y, Y, Y, W), - flags); - emit_arith(rp, PFS_OP_MAD, temp, - WRITEMASK_X, - temp, - pfs_one, - negate(swizzle(temp, Z, Z, Z, W)), - flags); - emit_arith(rp, PFS_OP_MAD, dest, mask, + /* add 0.5*PI and do range reduction */ + + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X, + swizzle(rp->const_sin[0], Z, Z, Z, Z), //PI + pfs_half, + swizzle(keep(src[0]), X, X, X, X), + 0); + + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X, swizzle(temp, X, X, X, X), - pfs_one, + swizzle(rp->const_sin[1], Z, Z, Z, Z), + pfs_half, + 0); + + emit_arith(rp, PFS_OP_FRC, temp, WRITEMASK_X, + swizzle(temp, X, X, X, X), + undef, + undef, + 0); + + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Z, + swizzle(temp, X, X, X, X), + swizzle(rp->const_sin[1], W, W, W, W), //2*PI + negate(swizzle(rp->const_sin[0], Z, Z, Z, Z)), //-PI + 0); + + /* SIN */ + + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y, + swizzle(temp, Z, Z, Z, Z), + rp->const_sin[0], pfs_zero, + 0); + + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X, + swizzle(temp, Y, Y, Y, Y), + absolute(swizzle(temp, Z, Z, Z, Z)), + swizzle(temp, X, X, X, X), + 0); + + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y, + swizzle(temp, X, X, X, X), + absolute(swizzle(temp, X, X, X, X)), + negate(swizzle(temp, X, X, X, X)), + 0); + + + emit_arith(rp, PFS_OP_MAD, dest, mask, + swizzle(temp, Y, Y, Y, Y), + swizzle(rp->const_sin[0], W, W, W, W), + swizzle(temp, X, X, X, X), flags); + free_temp(rp, temp); break; case OPCODE_DP3: @@ -1167,7 +1393,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp) /* result.x = 1.0 * result.w = src1.w */ if (mask & WRITEMASK_XW) { - src[1].v_swz = SWIZZLE_111; /* Cheat.. */ + REG_SET_VSWZ(src[1], SWIZZLE_111); /*Cheat*/ emit_arith(rp, PFS_OP_MAD, dest, mask & WRITEMASK_XW, src[1], pfs_one, pfs_zero, @@ -1222,7 +1448,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp) * change the compare to (t.x + 0.5) > 0.5 we may * save one instruction by doing CMP -t.x */ - cnstv[0] = cnstv[1] = cnstv[2] = cnstv[4] = 0.50001; + cnstv[0] = cnstv[1] = cnstv[2] = cnstv[3] = 0.50001; src[0] = t_src(rp, fpi->SrcReg[0]); temp = get_temp_reg(rp); cnst = emit_const4fv(rp, cnstv); @@ -1353,7 +1579,93 @@ static GLboolean parse_program(struct r300_fragment_program *rp) flags); break; case OPCODE_SCS: - ERROR("SCS not implemented\n"); + /* + * cos using a parabola (see SIN): + * cos(x): + * x += PI/2 + * x = (x/(2*PI))+0.5 + * x = frac(x) + * x = (x*2*PI)-PI + * result = sin(x) + */ + temp = get_temp_reg(rp); + make_sin_const(rp); + src[0] = t_scalar_src(rp, fpi->SrcReg[0]); + + /* add 0.5*PI and do range reduction */ + + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X|WRITEMASK_Y, + swizzle(rp->const_sin[0], Z, Z, Z, Z), + rp->const_sin[1], + swizzle(keep(src[0]), X, X, X, X), + 0); + + emit_arith(rp, PFS_OP_CMP, temp, WRITEMASK_W, + swizzle(rp->const_sin[0], Z, Z, Z, Z), + negate(pfs_half), + swizzle(keep(src[0]), X, X, X, X), + 0); + + emit_arith(rp, PFS_OP_CMP, temp, WRITEMASK_Z, + swizzle(temp, X, X, X, X), + swizzle(temp, Y, Y, Y, Y), + swizzle(temp, W, W, W, W), + 0); + + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y, + swizzle(temp, Z, Z, Z, Z), + rp->const_sin[0], + pfs_zero, + 0); + + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W, + swizzle(temp, Y, Y, Y, Y), + absolute(swizzle(temp, Z, Z, Z, Z)), + swizzle(temp, X, X, X, X), + 0); + + if(mask & WRITEMASK_Y) + { + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y, + swizzle(keep(src[0]), X, X, X, X), + rp->const_sin[0], + pfs_zero, + 0); + + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X, + swizzle(temp, Y, Y, Y, Y), + absolute(swizzle(keep(src[0]), X, X, X, X)), + swizzle(temp, X, X, X, X), + 0); + } + + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Z, + swizzle(temp, W, W, W, W), + absolute(swizzle(temp, W, W, W, W)), + negate(swizzle(temp, W, W, W, W)), + 0); + + emit_arith(rp, PFS_OP_MAD, dest, WRITEMASK_X, + swizzle(temp, Z, Z, Z, Z), + swizzle(rp->const_sin[0], W, W, W, W), + swizzle(temp, W, W, W, W), + flags); + + if(mask & WRITEMASK_Y) + { + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W, + swizzle(temp, X, X, X, X), + absolute(swizzle(temp, X, X, X, X)), + negate(swizzle(temp, X, X, X, X)), + 0); + + emit_arith(rp, PFS_OP_MAD, dest, WRITEMASK_Y, + swizzle(temp, W, W, W, W), + swizzle(rp->const_sin[0], W, W, W, W), + swizzle(temp, X, X, X, X), + flags); + } + free_temp(rp, temp); break; case OPCODE_SGE: src[0] = t_src(rp, fpi->SrcReg[0]); @@ -1372,68 +1684,63 @@ static GLboolean parse_program(struct r300_fragment_program *rp) break; case OPCODE_SIN: /* - * sin using taylor serie: - * sin(x) = x - x^3/3! + x^5/5! - x^7/7! + * using a parabola: + * sin(x) = 4/pi * x + -4/(pi*pi) * x * abs(x) + * extra precision is obtained by weighting against + * itself squared. */ + temp = get_temp_reg(rp); - cnstv[0] = 0.333333333; - cnstv[1] = 0.008333333; - cnstv[2] = 0.000198413; - cnstv[4] = 0.0; - cnst = emit_const4fv(rp, cnstv); + make_sin_const(rp); src[0] = t_scalar_src(rp, fpi->SrcReg[0]); - emit_arith(rp, PFS_OP_MAD, temp, - WRITEMASK_XYZ, - src[0], - src[0], - pfs_zero, - flags); - emit_arith(rp, PFS_OP_MAD, temp, - WRITEMASK_Y | WRITEMASK_Z, - temp, temp, - pfs_zero, - flags); - emit_arith(rp, PFS_OP_MAD, temp, - WRITEMASK_Z, - temp, - swizzle(temp, X, X, X, W), - pfs_zero, - flags); - emit_arith(rp, PFS_OP_MAD, temp, - WRITEMASK_XYZ, - src[0], - temp, - pfs_zero, - flags); - emit_arith(rp, PFS_OP_MAD, temp, - WRITEMASK_XYZ, - temp, cnst, - pfs_zero, - flags); - emit_arith(rp, PFS_OP_MAD, temp, - WRITEMASK_X, - src[0], - pfs_one, - negate(temp), - flags); - emit_arith(rp, PFS_OP_MAD, temp, - WRITEMASK_X, - temp, - pfs_one, - swizzle(temp, Y, Y, Y, W), - flags); - emit_arith(rp, PFS_OP_MAD, temp, - WRITEMASK_X, - temp, - pfs_one, - negate(swizzle(temp, Z, Z, Z, W)), - flags); - emit_arith(rp, PFS_OP_MAD, dest, mask, + /* do range reduction */ + + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X, + swizzle(keep(src[0]), X, X, X, X), + swizzle(rp->const_sin[1], Z, Z, Z, Z), + pfs_half, + 0); + + emit_arith(rp, PFS_OP_FRC, temp, WRITEMASK_X, swizzle(temp, X, X, X, X), - pfs_one, + undef, + undef, + 0); + + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Z, + swizzle(temp, X, X, X, X), + swizzle(rp->const_sin[1], W, W, W, W), //2*PI + negate(swizzle(rp->const_sin[0], Z, Z, Z, Z)), //PI + 0); + + /* SIN */ + + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y, + swizzle(temp, Z, Z, Z, Z), + rp->const_sin[0], pfs_zero, + 0); + + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X, + swizzle(temp, Y, Y, Y, Y), + absolute(swizzle(temp, Z, Z, Z, Z)), + swizzle(temp, X, X, X, X), + 0); + + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y, + swizzle(temp, X, X, X, X), + absolute(swizzle(temp, X, X, X, X)), + negate(swizzle(temp, X, X, X, X)), + 0); + + + emit_arith(rp, PFS_OP_MAD, dest, mask, + swizzle(temp, Y, Y, Y, Y), + swizzle(rp->const_sin[0], W, W, W, W), + swizzle(temp, X, X, X, X), flags); + free_temp(rp, temp); break; case OPCODE_SLT: @@ -1505,7 +1812,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp) /* - Init structures * - Determine what hwregs each input corresponds to */ -static void init_program(struct r300_fragment_program *rp) +static void init_program(r300ContextPtr r300, struct r300_fragment_program *rp) { struct r300_pfs_compile_state *cs = NULL; struct gl_fragment_program *mp = &rp->mesa_program; @@ -1515,6 +1822,7 @@ static void init_program(struct r300_fragment_program *rp) int i,j; /* New compile, reset tracking data */ + rp->optimization = driQueryOptioni(&r300->radeon.optionCache, "fp_optimization"); rp->translated = GL_FALSE; rp->error = GL_FALSE; rp->cs = cs = &(R300_CONTEXT(rp->ctx)->state.pfs_compile); @@ -1527,6 +1835,7 @@ static void init_program(struct r300_fragment_program *rp) rp->max_temp_idx = 0; rp->node[0].alu_end = -1; rp->node[0].tex_end = -1; + rp->const_sin[0] = -1; _mesa_memset(cs, 0, sizeof(*rp->cs)); for (i=0;i<PFS_MAX_ALU_INST;i++) { @@ -1640,13 +1949,13 @@ static void update_params(struct r300_fragment_program *rp) rp->params_uptodate = GL_TRUE; } -void r300_translate_fragment_shader(struct r300_fragment_program *rp) +void r300_translate_fragment_shader(r300ContextPtr r300, struct r300_fragment_program *rp) { struct r300_pfs_compile_state *cs = NULL; if (!rp->translated) { - init_program(rp); + init_program(r300, rp); cs = rp->cs; if (parse_program(rp) == GL_FALSE) { |