summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/r300/r300_fragprog.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/r300/r300_fragprog.c')
-rw-r--r--src/mesa/drivers/dri/r300/r300_fragprog.c1111
1 files changed, 710 insertions, 401 deletions
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c
index 32c0128eaa..8e45bd5403 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog.c
@@ -33,7 +33,6 @@
/*TODO'S
*
- * - COS/SIN/SCS instructions
* - Depth write, WPOS/FOGC inputs
* - FogOption
* - Verify results of opcodes for accuracy, I've only checked them
@@ -51,18 +50,110 @@
#include "r300_fragprog.h"
#include "r300_reg.h"
+/*
+ * Usefull macros and values
+ */
+#define ERROR(fmt, args...) do { \
+ fprintf(stderr, "%s::%s(): " fmt "\n", \
+ __FILE__, __func__, ##args); \
+ rp->error = GL_TRUE; \
+ } while(0)
+
#define PFS_INVAL 0xFFFFFFFF
#define COMPILE_STATE struct r300_pfs_compile_state *cs = rp->cs
-static void dump_program(struct r300_fragment_program *rp);
-static void emit_arith(struct r300_fragment_program *rp, int op,
- pfs_reg_t dest, int mask,
- pfs_reg_t src0, pfs_reg_t src1, pfs_reg_t src2,
- int flags);
+#define SWIZZLE_XYZ 0
+#define SWIZZLE_XXX 1
+#define SWIZZLE_YYY 2
+#define SWIZZLE_ZZZ 3
+#define SWIZZLE_WWW 4
+#define SWIZZLE_YZX 5
+#define SWIZZLE_ZXY 6
+#define SWIZZLE_WZY 7
+#define SWIZZLE_111 8
+#define SWIZZLE_000 9
+#define SWIZZLE_HHH 10
+
+#define swizzle(r, x, y, z, w) do_swizzle(rp, r, \
+ ((SWIZZLE_##x<<0)| \
+ (SWIZZLE_##y<<3)| \
+ (SWIZZLE_##z<<6)| \
+ (SWIZZLE_##w<<9)), \
+ 0)
+
+#define REG_TYPE_INPUT 0
+#define REG_TYPE_OUTPUT 1
+#define REG_TYPE_TEMP 2
+#define REG_TYPE_CONST 3
+
+#define REG_TYPE_SHIFT 0
+#define REG_INDEX_SHIFT 2
+#define REG_VSWZ_SHIFT 8
+#define REG_SSWZ_SHIFT 13
+#define REG_NEGV_SHIFT 18
+#define REG_NEGS_SHIFT 19
+#define REG_ABS_SHIFT 20
+#define REG_NO_USE_SHIFT 21
+#define REG_VALID_SHIFT 22
+
+#define REG_TYPE_MASK (0x03 << REG_TYPE_SHIFT)
+#define REG_INDEX_MASK (0x3F << REG_INDEX_SHIFT)
+#define REG_VSWZ_MASK (0x1F << REG_VSWZ_SHIFT)
+#define REG_SSWZ_MASK (0x1F << REG_SSWZ_SHIFT)
+#define REG_NEGV_MASK (0x01 << REG_NEGV_SHIFT)
+#define REG_NEGS_MASK (0x01 << REG_NEGS_SHIFT)
+#define REG_ABS_MASK (0x01 << REG_ABS_SHIFT)
+#define REG_NO_USE_MASK (0x01 << REG_NO_USE_SHIFT)
+#define REG_VALID_MASK (0x01 << REG_VALID_SHIFT)
+
+#define REG(type, index, vswz, sswz, nouse, valid) \
+ (((type << REG_TYPE_SHIFT) & REG_TYPE_MASK) | \
+ ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK) | \
+ ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK) | \
+ ((valid << REG_VALID_SHIFT) & REG_VALID_MASK) | \
+ ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK) | \
+ ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK))
+#define REG_GET_TYPE(reg) \
+ ((reg & REG_TYPE_MASK) >> REG_TYPE_SHIFT)
+#define REG_GET_INDEX(reg) \
+ ((reg & REG_INDEX_MASK) >> REG_INDEX_SHIFT)
+#define REG_GET_VSWZ(reg) \
+ ((reg & REG_VSWZ_MASK) >> REG_VSWZ_SHIFT)
+#define REG_GET_SSWZ(reg) \
+ ((reg & REG_SSWZ_MASK) >> REG_SSWZ_SHIFT)
+#define REG_GET_NO_USE(reg) \
+ ((reg & REG_NO_USE_MASK) >> REG_NO_USE_SHIFT)
+#define REG_GET_VALID(reg) \
+ ((reg & REG_VALID_MASK) >> REG_VALID_SHIFT)
+#define REG_SET_TYPE(reg, type) \
+ reg = ((reg & ~REG_TYPE_MASK) | \
+ ((type << REG_TYPE_SHIFT) & REG_TYPE_MASK))
+#define REG_SET_INDEX(reg, index) \
+ reg = ((reg & ~REG_INDEX_MASK) | \
+ ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK))
+#define REG_SET_VSWZ(reg, vswz) \
+ reg = ((reg & ~REG_VSWZ_MASK) | \
+ ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK))
+#define REG_SET_SSWZ(reg, sswz) \
+ reg = ((reg & ~REG_SSWZ_MASK) | \
+ ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK))
+#define REG_SET_NO_USE(reg, nouse) \
+ reg = ((reg & ~REG_NO_USE_MASK) | \
+ ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK))
+#define REG_SET_VALID(reg, valid) \
+ reg = ((reg & ~REG_VALID_MASK) | \
+ ((valid << REG_VALID_SHIFT) & REG_VALID_MASK))
+#define REG_ABS(reg) \
+ reg = (reg | REG_ABS_MASK)
+#define REG_NEGV(reg) \
+ reg = (reg | REG_NEGV_MASK)
+#define REG_NEGS(reg) \
+ reg = (reg | REG_NEGS_MASK)
-/***************************************
- * begin: useful data structions for fragment program generation
- ***************************************/
+
+/*
+ * Datas structures for fragment program generation
+ */
/* description of r300 native hw instructions */
static const struct {
@@ -86,20 +177,23 @@ static const struct {
{ "CMPH", 3, R300_FPI0_OUTC_CMPH, PFS_INVAL },
};
-#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, \
- SWIZZLE_##y, \
- SWIZZLE_##z, \
- SWIZZLE_ZERO))
-
-#define SLOT_VECTOR (1<<0)
-#define SLOT_SCALAR (1<<3)
-#define SLOT_BOTH (SLOT_VECTOR|SLOT_SCALAR)
/* vector swizzles r300 can support natively, with a couple of
* cases we handle specially
*
- * pfs_reg_t.v_swz/pfs_reg_t.s_swz is an index into this table
- **/
+ * REG_VSWZ/REG_SSWZ is an index into this table
+ */
+#define SLOT_VECTOR (1<<0)
+#define SLOT_SCALAR (1<<3)
+#define SLOT_BOTH (SLOT_VECTOR | SLOT_SCALAR)
+
+/* mapping from SWIZZLE_* to r300 native values for scalar insns */
+#define SWIZZLE_HALF 6
+
+#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, \
+ SWIZZLE_##y, \
+ SWIZZLE_##z, \
+ SWIZZLE_ZERO))
static const struct r300_pfs_swizzle {
GLuint hash; /* swizzle value this matches */
GLuint base; /* base value for hw swizzle */
@@ -117,42 +211,30 @@ static const struct r300_pfs_swizzle {
{ MAKE_SWZ3(W, Z, Y), R300_FPI0_ARGC_SRC0CA_WZY, 1, SLOT_BOTH },
{ MAKE_SWZ3(ONE, ONE, ONE), R300_FPI0_ARGC_ONE, 0, 0},
{ MAKE_SWZ3(ZERO, ZERO, ZERO), R300_FPI0_ARGC_ZERO, 0, 0},
- { PFS_INVAL, R300_FPI0_ARGC_HALF, 0, 0},
+ { MAKE_SWZ3(HALF, HALF, HALF), R300_FPI0_ARGC_HALF, 0, 0},
{ PFS_INVAL, 0, 0, 0},
};
-#define SWIZZLE_XYZ 0
-#define SWIZZLE_XXX 1
-#define SWIZZLE_YYY 2
-#define SWIZZLE_ZZZ 3
-#define SWIZZLE_WWW 4
-#define SWIZZLE_YZX 5
-#define SWIZZLE_ZXY 6
-#define SWIZZLE_WZY 7
-#define SWIZZLE_111 8
-#define SWIZZLE_000 9
-#define SWIZZLE_HHH 10
+/* used during matching of non-native swizzles */
#define SWZ_X_MASK (7 << 0)
#define SWZ_Y_MASK (7 << 3)
#define SWZ_Z_MASK (7 << 6)
#define SWZ_W_MASK (7 << 9)
-/* used during matching of non-native swizzles */
static const struct {
- GLuint hash; /* used to mask matching swizzle components */
+ GLuint hash; /* used to mask matching swizzle components */
int mask; /* actual outmask */
int count; /* count of components matched */
} s_mask[] = {
- { SWZ_X_MASK|SWZ_Y_MASK|SWZ_Z_MASK, 1|2|4, 3},
- { SWZ_X_MASK|SWZ_Y_MASK, 1|2, 2},
- { SWZ_X_MASK|SWZ_Z_MASK, 1|4, 2},
- { SWZ_Y_MASK|SWZ_Z_MASK, 2|4, 2},
- { SWZ_X_MASK, 1, 1},
- { SWZ_Y_MASK, 2, 1},
- { SWZ_Z_MASK, 4, 1},
- { PFS_INVAL, PFS_INVAL, PFS_INVAL}
+ { SWZ_X_MASK|SWZ_Y_MASK|SWZ_Z_MASK, 1|2|4, 3},
+ { SWZ_X_MASK|SWZ_Y_MASK, 1|2, 2},
+ { SWZ_X_MASK|SWZ_Z_MASK, 1|4, 2},
+ { SWZ_Y_MASK|SWZ_Z_MASK, 2|4, 2},
+ { SWZ_X_MASK, 1, 1},
+ { SWZ_Y_MASK, 2, 1},
+ { SWZ_Z_MASK, 4, 1},
+ { PFS_INVAL, PFS_INVAL, PFS_INVAL}
};
-/* mapping from SWIZZLE_* to r300 native values for scalar insns */
static const struct {
int base; /* hw value of swizzle */
int stride; /* difference between SRC0/1/2 */
@@ -166,58 +248,51 @@ static const struct {
{ R300_FPI2_ARGA_ONE , 0, 0 },
{ R300_FPI2_ARGA_HALF , 0, 0 }
};
-#define SWIZZLE_HALF 6
/* boiler-plate reg, for convenience */
-static const pfs_reg_t undef = {
- type: REG_TYPE_TEMP,
- index: 0,
- v_swz: SWIZZLE_XYZ,
- s_swz: SWIZZLE_W,
- negate_v: 0,
- negate_s: 0,
- absolute: 0,
- no_use: GL_FALSE,
- valid: GL_FALSE
-};
+static const GLuint undef = REG(REG_TYPE_TEMP,
+ 0,
+ SWIZZLE_XYZ,
+ SWIZZLE_W,
+ GL_FALSE,
+ GL_FALSE);
/* constant one source */
-static const pfs_reg_t pfs_one = {
- type: REG_TYPE_CONST,
- index: 0,
- v_swz: SWIZZLE_111,
- s_swz: SWIZZLE_ONE,
- valid: GL_TRUE
-};
+static const GLuint pfs_one = REG(REG_TYPE_CONST,
+ 0,
+ SWIZZLE_111,
+ SWIZZLE_ONE,
+ GL_FALSE,
+ GL_TRUE);
/* constant half source */
-static const pfs_reg_t pfs_half = {
- type: REG_TYPE_CONST,
- index: 0,
- v_swz: SWIZZLE_HHH,
- s_swz: SWIZZLE_HALF,
- valid: GL_TRUE
-};
+static const GLuint pfs_half = REG(REG_TYPE_CONST,
+ 0,
+ SWIZZLE_HHH,
+ SWIZZLE_HALF,
+ GL_FALSE,
+ GL_TRUE);
/* constant zero source */
-static const pfs_reg_t pfs_zero = {
- type: REG_TYPE_CONST,
- index: 0,
- v_swz: SWIZZLE_000,
- s_swz: SWIZZLE_ZERO,
- valid: GL_TRUE
-};
-
-/***************************************
- * end: data structures
- ***************************************/
+static const GLuint pfs_zero = REG(REG_TYPE_CONST,
+ 0,
+ SWIZZLE_000,
+ SWIZZLE_ZERO,
+ GL_FALSE,
+ GL_TRUE);
-#define ERROR(fmt, args...) do { \
- fprintf(stderr, "%s::%s(): " fmt "\n",\
- __FILE__, __func__, ##args); \
- rp->error = GL_TRUE; \
-} while(0)
+/*
+ * Common functions prototypes
+ */
+static void dump_program(struct r300_fragment_program *rp);
+static void emit_arith(struct r300_fragment_program *rp, int op,
+ GLuint dest, int mask,
+ GLuint src0, GLuint src1, GLuint src2,
+ int flags);
+/*
+ * Helper functions prototypes
+ */
static int get_hw_temp(struct r300_fragment_program *rp)
{
COMPILE_STATE;
@@ -256,263 +331,360 @@ static void free_hw_temp(struct r300_fragment_program *rp, int idx)
cs->hwreg_in_use &= ~(1<<idx);
}
-static pfs_reg_t get_temp_reg(struct r300_fragment_program *rp)
+static GLuint get_temp_reg(struct r300_fragment_program *rp)
{
COMPILE_STATE;
- pfs_reg_t r = undef;
+ GLuint r = undef;
+ GLuint index;
- r.index = ffs(~cs->temp_in_use);
- if (!r.index) {
+ index = ffs(~cs->temp_in_use);
+ if (!index) {
ERROR("Out of program temps\n");
return r;
}
- cs->temp_in_use |= (1 << --r.index);
-
- cs->temps[r.index].refcount = 0xFFFFFFFF;
- cs->temps[r.index].reg = -1;
- r.valid = GL_TRUE;
+
+ cs->temp_in_use |= (1 << --index);
+ cs->temps[index].refcount = 0xFFFFFFFF;
+ cs->temps[index].reg = -1;
+
+ REG_SET_TYPE(r, REG_TYPE_TEMP);
+ REG_SET_INDEX(r, index);
+ REG_SET_VALID(r, GL_TRUE);
return r;
}
-static pfs_reg_t get_temp_reg_tex(struct r300_fragment_program *rp)
+static GLuint get_temp_reg_tex(struct r300_fragment_program *rp)
{
COMPILE_STATE;
- pfs_reg_t r = undef;
+ GLuint r = undef;
+ GLuint index;
- r.index = ffs(~cs->temp_in_use);
- if (!r.index) {
+ index = ffs(~cs->temp_in_use);
+ if (!index) {
ERROR("Out of program temps\n");
return r;
}
- cs->temp_in_use |= (1 << --r.index);
-
- cs->temps[r.index].refcount = 0xFFFFFFFF;
- cs->temps[r.index].reg = get_hw_temp_tex(rp);
- r.valid = GL_TRUE;
+
+ cs->temp_in_use |= (1 << --index);
+ cs->temps[index].refcount = 0xFFFFFFFF;
+ cs->temps[index].reg = get_hw_temp_tex(rp);
+
+ REG_SET_TYPE(r, REG_TYPE_TEMP);
+ REG_SET_INDEX(r, index);
+ REG_SET_VALID(r, GL_TRUE);
return r;
}
-static void free_temp(struct r300_fragment_program *rp, pfs_reg_t r)
+static void free_temp(struct r300_fragment_program *rp, GLuint r)
{
COMPILE_STATE;
- if (!(cs->temp_in_use & (1<<r.index))) return;
+ GLuint index = REG_GET_INDEX(r);
+
+ if (!(cs->temp_in_use & (1 << index)))
+ return;
- if (r.type == REG_TYPE_TEMP) {
- free_hw_temp(rp, cs->temps[r.index].reg);
- cs->temps[r.index].reg = -1;
- cs->temp_in_use &= ~(1<<r.index);
- } else if (r.type == REG_TYPE_INPUT) {
- free_hw_temp(rp, cs->inputs[r.index].reg);
- cs->inputs[r.index].reg = -1;
+ if (REG_GET_TYPE(r) == REG_TYPE_TEMP) {
+ free_hw_temp(rp, cs->temps[index].reg);
+ cs->temps[index].reg = -1;
+ cs->temp_in_use &= ~(1 << index);
+ } else if (REG_GET_TYPE(r) == REG_TYPE_INPUT) {
+ free_hw_temp(rp, cs->inputs[index].reg);
+ cs->inputs[index].reg = -1;
}
}
-static pfs_reg_t emit_param4fv(struct r300_fragment_program *rp,
- GLfloat *values)
+static GLuint emit_param4fv(struct r300_fragment_program *rp,
+ GLfloat *values)
{
- pfs_reg_t r = undef;
- r.type = REG_TYPE_CONST;
+ GLuint r = undef;
+ GLuint index;
int pidx;
pidx = rp->param_nr++;
- r.index = rp->const_nr++;
- if (pidx >= PFS_NUM_CONST_REGS || r.index >= PFS_NUM_CONST_REGS) {
+ index = rp->const_nr++;
+ if (pidx >= PFS_NUM_CONST_REGS || index >= PFS_NUM_CONST_REGS) {
ERROR("Out of const/param slots!\n");
return r;
}
-
- rp->param[pidx].idx = r.index;
+
+ rp->param[pidx].idx = index;
rp->param[pidx].values = values;
rp->params_uptodate = GL_FALSE;
- r.valid = GL_TRUE;
+ REG_SET_TYPE(r, REG_TYPE_CONST);
+ REG_SET_INDEX(r, index);
+ REG_SET_VALID(r, GL_TRUE);
return r;
}
-static pfs_reg_t emit_const4fv(struct r300_fragment_program *rp, GLfloat *cp)
+static GLuint emit_const4fv(struct r300_fragment_program *rp, GLfloat *cp)
{
- pfs_reg_t r = undef;
- r.type = REG_TYPE_CONST;
+ GLuint r = undef;
+ GLuint index;
- r.index = rp->const_nr++;
- if (r.index >= PFS_NUM_CONST_REGS) {
+ index = rp->const_nr++;
+ if (index >= PFS_NUM_CONST_REGS) {
ERROR("Out of hw constants!\n");
return r;
}
- COPY_4V(rp->constant[r.index], cp);
- r.valid = GL_TRUE;
+ COPY_4V(rp->constant[index], cp);
+
+ REG_SET_TYPE(r, REG_TYPE_CONST);
+ REG_SET_INDEX(r, index);
+ REG_SET_VALID(r, GL_TRUE);
return r;
}
-static __inline pfs_reg_t negate(pfs_reg_t r)
+static inline GLuint negate(GLuint r)
{
- r.negate_v = 1;
- r.negate_s = 1;
+ REG_NEGS(r);
+ REG_NEGV(r);
return r;
}
/* Hack, to prevent clobbering sources used multiple times when
* emulating non-native instructions
*/
-static __inline pfs_reg_t keep(pfs_reg_t r)
+static inline GLuint keep(GLuint r)
{
- r.no_use = GL_TRUE;
+ REG_SET_NO_USE(r, GL_TRUE);
return r;
}
-static __inline pfs_reg_t absolute(pfs_reg_t r)
+static inline GLuint absolute(GLuint r)
{
- r.absolute = 1;
+ REG_ABS(r);
return r;
}
static int swz_native(struct r300_fragment_program *rp,
- pfs_reg_t src, pfs_reg_t *r, GLuint arbneg)
+ GLuint src,
+ GLuint *r,
+ GLuint arbneg)
{
- /* Native swizzle, nothing to see here */
- src.negate_s = (arbneg >> 3) & 1;
+ /* Native swizzle, handle negation */
+ src = (src & ~REG_NEGS_MASK) |
+ (((arbneg >> 3) & 1) << REG_NEGS_SHIFT);
if ((arbneg & 0x7) == 0x0) {
- src.negate_v = 0;
+ src = src & ~REG_NEGV_MASK;
*r = src;
} else if ((arbneg & 0x7) == 0x7) {
- src.negate_v = 1;
+ src |= REG_NEGV_MASK;
*r = src;
} else {
- if (!r->valid)
+ if (!REG_GET_VALID(*r))
*r = get_temp_reg(rp);
- src.negate_v = 1;
- emit_arith(rp, PFS_OP_MAD, *r, arbneg & 0x7,
- keep(src), pfs_one, pfs_zero, 0);
- src.negate_v = 0;
- emit_arith(rp, PFS_OP_MAD, *r,
+ src |= REG_NEGV_MASK;
+ emit_arith(rp,
+ PFS_OP_MAD,
+ *r,
+ arbneg & 0x7,
+ keep(src),
+ pfs_one,
+ pfs_zero,
+ 0);
+ src = src & ~REG_NEGV_MASK;
+ emit_arith(rp,
+ PFS_OP_MAD,
+ *r,
(arbneg ^ 0x7) | WRITEMASK_W,
- src, pfs_one, pfs_zero, 0);
+ src,
+ pfs_one,
+ pfs_zero,
+ 0);
}
return 3;
}
-static int swz_emit_partial(struct r300_fragment_program *rp, pfs_reg_t src,
- pfs_reg_t *r, int mask, int mc, GLuint arbneg)
+static int swz_emit_partial(struct r300_fragment_program *rp,
+ GLuint src,
+ GLuint *r,
+ int mask,
+ int mc,
+ GLuint arbneg)
{
GLuint tmp;
GLuint wmask = 0;
- if (!r->valid)
+ if (!REG_GET_VALID(*r))
*r = get_temp_reg(rp);
- /* A partial match, src.v_swz/mask define what parts of the
- * desired swizzle we match */
+ /* A partial match, VSWZ/mask define what parts of the
+ * desired swizzle we match
+ */
if (mc + s_mask[mask].count == 3) {
wmask = WRITEMASK_W;
- src.negate_s = (arbneg >> 3) & 1;
+ src |= ((arbneg >> 3) & 1) << REG_NEGS_SHIFT;
}
tmp = arbneg & s_mask[mask].mask;
if (tmp) {
tmp = tmp ^ s_mask[mask].mask;
if (tmp) {
- src.negate_v = 1;
- emit_arith(rp, PFS_OP_MAD, *r,
+ emit_arith(rp,
+ PFS_OP_MAD,
+ *r,
arbneg & s_mask[mask].mask,
- keep(src), pfs_one, pfs_zero, 0);
- src.negate_v = 0;
- if (!wmask) src.no_use = GL_TRUE;
- else src.no_use = GL_FALSE;
- emit_arith(rp, PFS_OP_MAD, *r, tmp | wmask,
- src, pfs_one, pfs_zero, 0);
+ keep(src) | REG_NEGV_MASK,
+ pfs_one,
+ pfs_zero,
+ 0);
+ if (!wmask) {
+ REG_SET_NO_USE(src, GL_TRUE);
+ } else {
+ REG_SET_NO_USE(src, GL_FALSE);
+ }
+ emit_arith(rp,
+ PFS_OP_MAD,
+ *r,
+ tmp | wmask,
+ src,
+ pfs_one,
+ pfs_zero,
+ 0);
} else {
- src.negate_v = 1;
- if (!wmask) src.no_use = GL_TRUE;
- else src.no_use = GL_FALSE;
- emit_arith(rp, PFS_OP_MAD, *r,
+ if (!wmask) {
+ REG_SET_NO_USE(src, GL_TRUE);
+ } else {
+ REG_SET_NO_USE(src, GL_FALSE);
+ }
+ emit_arith(rp,
+ PFS_OP_MAD,
+ *r,
(arbneg & s_mask[mask].mask) | wmask,
- src, pfs_one, pfs_zero, 0);
- src.negate_v = 0;
+ src | REG_NEGV_MASK,
+ pfs_one,
+ pfs_zero,
+ 0);
}
} else {
- if (!wmask) src.no_use = GL_TRUE;
- else src.no_use = GL_FALSE;
- emit_arith(rp, PFS_OP_MAD, *r,
+ if (!wmask) {
+ REG_SET_NO_USE(src, GL_TRUE);
+ } else {
+ REG_SET_NO_USE(src, GL_FALSE);
+ }
+ emit_arith(rp, PFS_OP_MAD,
+ *r,
s_mask[mask].mask | wmask,
- src, pfs_one, pfs_zero, 0);
+ src,
+ pfs_one,
+ pfs_zero,
+ 0);
}
return s_mask[mask].count;
}
-#define swizzle(r, x, y, z, w) do_swizzle(rp, r, \
- ((SWIZZLE_##x<<0)| \
- (SWIZZLE_##y<<3)| \
- (SWIZZLE_##z<<6)| \
- (SWIZZLE_##w<<9)), \
- 0)
-
-static pfs_reg_t do_swizzle(struct r300_fragment_program *rp,
- pfs_reg_t src, GLuint arbswz, GLuint arbneg)
+static GLuint do_swizzle(struct r300_fragment_program *rp,
+ GLuint src,
+ GLuint arbswz,
+ GLuint arbneg)
{
- pfs_reg_t r = undef;
-
+ GLuint r = undef;
+ GLuint vswz;
int c_mask = 0;
- int v_matched = 0;
+ int v_match = 0;
/* If swizzling from something without an XYZW native swizzle,
* emit result to a temp, and do new swizzle from the temp.
*/
- if (src.v_swz != SWIZZLE_XYZ || src.s_swz != SWIZZLE_W) {
- pfs_reg_t temp = get_temp_reg(rp);
- emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_XYZW, src, pfs_one,
- pfs_zero, 0);
+#if 0
+ if (REG_GET_VSWZ(src) != SWIZZLE_XYZ ||
+ REG_GET_SSWZ(src) != SWIZZLE_W) {
+ GLuint temp = get_temp_reg(rp);
+ emit_arith(rp,
+ PFS_OP_MAD,
+ temp,
+ WRITEMASK_XYZW,
+ src,
+ pfs_one,
+ pfs_zero,
+ 0);
src = temp;
}
- src.s_swz = GET_SWZ(arbswz, 3);
+#endif
+
+ if (REG_GET_VSWZ(src) != SWIZZLE_XYZ ||
+ REG_GET_SSWZ(src) != SWIZZLE_W) {
+ GLuint vsrcswz = (v_swiz[REG_GET_VSWZ(src)].hash & (SWZ_X_MASK|SWZ_Y_MASK|SWZ_Z_MASK)) | REG_GET_SSWZ(src) << 9;
+ GLint i;
+
+ GLuint newswz = 0;
+ GLuint offset;
+ for(i=0; i < 4; ++i){
+ offset = GET_SWZ(arbswz, i);
+
+ newswz |= (offset <= 3)?GET_SWZ(vsrcswz, offset) << i*3:offset << i*3;
+ }
+
+ arbswz = newswz & (SWZ_X_MASK|SWZ_Y_MASK|SWZ_Z_MASK);
+ REG_SET_SSWZ(src, GET_SWZ(newswz, 3));
+ }
+ else
+ {
+ /* set scalar swizzling */
+ REG_SET_SSWZ(src, GET_SWZ(arbswz, 3));
+ }
do {
+ vswz = REG_GET_VSWZ(src);
do {
-#define CUR_HASH (v_swiz[src.v_swz].hash & s_mask[c_mask].hash)
- if (CUR_HASH == (arbswz & s_mask[c_mask].hash)) {
- if (s_mask[c_mask].count == 3)
- v_matched += swz_native(rp, src, &r,
+ int chash;
+
+ REG_SET_VSWZ(src, vswz);
+ chash = v_swiz[REG_GET_VSWZ(src)].hash &
+ s_mask[c_mask].hash;
+
+ if (chash == (arbswz & s_mask[c_mask].hash)) {
+ if (s_mask[c_mask].count == 3) {
+ v_match += swz_native(rp,
+ src,
+ &r,
arbneg);
- else
- v_matched += swz_emit_partial(rp, src,
- &r,
- c_mask,
- v_matched,
- arbneg);
-
- if (v_matched == 3)
+ } else {
+ v_match += swz_emit_partial(rp,
+ src,
+ &r,
+ c_mask,
+ v_match,
+ arbneg);
+ }
+
+ if (v_match == 3)
return r;
/* Fill with something invalid.. all 0's was
* wrong before, matched SWIZZLE_X. So all
- * 1's will be okay for now */
+ * 1's will be okay for now
+ */
arbswz |= (PFS_INVAL & s_mask[c_mask].hash);
}
- } while(v_swiz[++src.v_swz].hash != PFS_INVAL);
- src.v_swz = SWIZZLE_XYZ;
+ } while(v_swiz[++vswz].hash != PFS_INVAL);
+ REG_SET_VSWZ(src, SWIZZLE_XYZ);
} while (s_mask[++c_mask].hash != PFS_INVAL);
ERROR("should NEVER get here\n");
return r;
}
-
-static pfs_reg_t t_src(struct r300_fragment_program *rp,
- struct prog_src_register fpsrc)
+
+static GLuint t_src(struct r300_fragment_program *rp,
+ struct prog_src_register fpsrc)
{
- pfs_reg_t r = undef;
+ GLuint r = undef;
switch (fpsrc.File) {
case PROGRAM_TEMPORARY:
- r.index = fpsrc.Index;
- r.valid = GL_TRUE;
+ REG_SET_INDEX(r, fpsrc.Index);
+ REG_SET_VALID(r, GL_TRUE);
+ REG_SET_TYPE(r, REG_TYPE_TEMP);
break;
case PROGRAM_INPUT:
- r.index = fpsrc.Index;
- r.type = REG_TYPE_INPUT;
- r.valid = GL_TRUE;
+ REG_SET_INDEX(r, fpsrc.Index);
+ REG_SET_VALID(r, GL_TRUE);
+ REG_SET_TYPE(r, REG_TYPE_INPUT);
break;
case PROGRAM_LOCAL_PARAM:
r = emit_param4fv(rp,
@@ -533,13 +705,13 @@ static pfs_reg_t t_src(struct r300_fragment_program *rp,
}
/* no point swizzling ONE/ZERO/HALF constants... */
- if (r.v_swz < SWIZZLE_111 || r.s_swz < SWIZZLE_ZERO)
+ if (REG_GET_VSWZ(r) < SWIZZLE_111 || REG_GET_SSWZ(r) < SWIZZLE_ZERO)
r = do_swizzle(rp, r, fpsrc.Swizzle, fpsrc.NegateBase);
return r;
}
-static pfs_reg_t t_scalar_src(struct r300_fragment_program *rp,
- struct prog_src_register fpsrc)
+static GLuint t_scalar_src(struct r300_fragment_program *rp,
+ struct prog_src_register fpsrc)
{
struct prog_src_register src = fpsrc;
int sc = GET_SWZ(fpsrc.Swizzle, 0); /* X */
@@ -549,22 +721,24 @@ static pfs_reg_t t_scalar_src(struct r300_fragment_program *rp,
return t_src(rp, src);
}
-static pfs_reg_t t_dst(struct r300_fragment_program *rp,
- struct prog_dst_register dest) {
- pfs_reg_t r = undef;
+static GLuint t_dst(struct r300_fragment_program *rp,
+ struct prog_dst_register dest)
+{
+ GLuint r = undef;
switch (dest.File) {
case PROGRAM_TEMPORARY:
- r.index = dest.Index;
- r.valid = GL_TRUE;
+ REG_SET_INDEX(r, dest.Index);
+ REG_SET_VALID(r, GL_TRUE);
+ REG_SET_TYPE(r, REG_TYPE_TEMP);
return r;
case PROGRAM_OUTPUT:
- r.type = REG_TYPE_OUTPUT;
+ REG_SET_TYPE(r, REG_TYPE_OUTPUT);
switch (dest.Index) {
case FRAG_RESULT_COLR:
case FRAG_RESULT_DEPR:
- r.index = dest.Index;
- r.valid = GL_TRUE;
+ REG_SET_INDEX(r, dest.Index);
+ REG_SET_VALID(r, GL_TRUE);
return r;
default:
ERROR("Bad DstReg->Index 0x%x\n", dest.Index);
@@ -576,66 +750,77 @@ static pfs_reg_t t_dst(struct r300_fragment_program *rp,
}
}
-static int t_hw_src(struct r300_fragment_program *rp, pfs_reg_t src,
+static int t_hw_src(struct r300_fragment_program *rp,
+ GLuint src,
GLboolean tex)
{
COMPILE_STATE;
int idx;
+ int index = REG_GET_INDEX(src);
- switch (src.type) {
+ switch(REG_GET_TYPE(src)) {
case REG_TYPE_TEMP:
/* NOTE: if reg==-1 here, a source is being read that
- * hasn't been written to. Undefined results */
- if (cs->temps[src.index].reg == -1)
- cs->temps[src.index].reg = get_hw_temp(rp);
- idx = cs->temps[src.index].reg;
+ * hasn't been written to. Undefined results
+ */
+ if (cs->temps[index].reg == -1)
+ cs->temps[index].reg = get_hw_temp(rp);
+
+ idx = cs->temps[index].reg;
- if (!src.no_use && (--cs->temps[src.index].refcount == 0))
+ if (!REG_GET_NO_USE(src) &&
+ (--cs->temps[index].refcount == 0))
free_temp(rp, src);
break;
case REG_TYPE_INPUT:
- idx = cs->inputs[src.index].reg;
+ idx = cs->inputs[index].reg;
- if (!src.no_use && (--cs->inputs[src.index].refcount == 0))
- free_hw_temp(rp, cs->inputs[src.index].reg);
+ if (!REG_GET_NO_USE(src) &&
+ (--cs->inputs[index].refcount == 0))
+ free_hw_temp(rp, cs->inputs[index].reg);
break;
case REG_TYPE_CONST:
- return (src.index | SRC_CONST);
+ return (index | SRC_CONST);
default:
ERROR("Invalid type for source reg\n");
return (0 | SRC_CONST);
}
- if (!tex) cs->used_in_node |= (1 << idx);
+ if (!tex)
+ cs->used_in_node |= (1 << idx);
return idx;
}
-static int t_hw_dst(struct r300_fragment_program *rp, pfs_reg_t dest,
+static int t_hw_dst(struct r300_fragment_program *rp,
+ GLuint dest,
GLboolean tex)
{
COMPILE_STATE;
int idx;
- assert(dest.valid);
+ GLuint index = REG_GET_INDEX(dest);
+ assert(REG_GET_VALID(dest));
- switch (dest.type) {
+ switch(REG_GET_TYPE(dest)) {
case REG_TYPE_TEMP:
- if (cs->temps[dest.index].reg == -1) {
- if (!tex)
- cs->temps[dest.index].reg = get_hw_temp(rp);
- else
- cs->temps[dest.index].reg = get_hw_temp_tex(rp);
+ if (cs->temps[REG_GET_INDEX(dest)].reg == -1) {
+ if (!tex) {
+ cs->temps[index].reg = get_hw_temp(rp);
+ } else {
+ cs->temps[index].reg = get_hw_temp_tex(rp);
+ }
}
- idx = cs->temps[dest.index].reg;
+ idx = cs->temps[index].reg;
- if (!dest.no_use && (--cs->temps[dest.index].refcount == 0))
+ if (!REG_GET_NO_USE(dest) &&
+ (--cs->temps[index].refcount == 0))
free_temp(rp, dest);
cs->dest_in_node |= (1 << idx);
cs->used_in_node |= (1 << idx);
break;
case REG_TYPE_OUTPUT:
- switch (dest.index) {
+ switch(index) {
case FRAG_RESULT_COLR:
rp->node[rp->cur_node].flags |= R300_PFS_NODE_OUTPUT_COLOR;
break;
@@ -643,17 +828,18 @@ static int t_hw_dst(struct r300_fragment_program *rp, pfs_reg_t dest,
rp->node[rp->cur_node].flags |= R300_PFS_NODE_OUTPUT_DEPTH;
break;
}
- return dest.index;
+ return index;
break;
default:
- ERROR("invalid dest reg type %d\n", dest.type);
+ ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest));
return 0;
}
return idx;
}
-static void emit_nop(struct r300_fragment_program *rp, GLuint mask,
+static void emit_nop(struct r300_fragment_program *rp,
+ GLuint mask,
GLboolean sync)
{
COMPILE_STATE;
@@ -679,8 +865,8 @@ static void emit_tex(struct r300_fragment_program *rp,
int opcode)
{
COMPILE_STATE;
- pfs_reg_t coord = t_src(rp, fpi->SrcReg[0]);
- pfs_reg_t dest = undef, rdest = undef;
+ GLuint coord = t_src(rp, fpi->SrcReg[0]);
+ GLuint dest = undef, rdest = undef;
GLuint din = cs->dest_in_node, uin = cs->used_in_node;
int unit = fpi->TexSrcUnit;
int hwsrc, hwdest;
@@ -691,7 +877,7 @@ static void emit_tex(struct r300_fragment_program *rp,
dest = t_dst(rp, fpi->DstReg);
/* r300 doesn't seem to be able to do TEX->output reg */
- if (dest.type == REG_TYPE_OUTPUT) {
+ if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) {
rdest = dest;
dest = get_temp_reg_tex(rp);
}
@@ -703,7 +889,7 @@ static void emit_tex(struct r300_fragment_program *rp,
if (uin & (1 << hwdest)) {
free_hw_temp(rp, hwdest);
hwdest = get_hw_temp_tex(rp);
- cs->temps[dest.index].reg = hwdest;
+ cs->temps[REG_GET_INDEX(dest)].reg = hwdest;
}
} else {
hwdest = 0;
@@ -713,8 +899,8 @@ static void emit_tex(struct r300_fragment_program *rp,
/* Indirection if source has been written in this node, or if the
* dest has been read/written in this node
*/
- if ((coord.type != REG_TYPE_CONST && (din & (1<<hwsrc))) ||
- (uin & (1<<hwdest))) {
+ if ((REG_GET_TYPE(coord) != REG_TYPE_CONST &&
+ (din & (1<<hwsrc))) || (uin & (1<<hwdest))) {
/* Finish off current node */
cs->v_pos = cs->s_pos = MAX2(cs->v_pos, cs->s_pos);
@@ -754,13 +940,13 @@ static void emit_tex(struct r300_fragment_program *rp,
| (opcode << R300_FPITX_OPCODE_SHIFT);
cs->dest_in_node |= (1 << hwdest);
- if (coord.type != REG_TYPE_CONST)
+ if (REG_GET_TYPE(coord) != REG_TYPE_CONST)
cs->used_in_node |= (1 << hwsrc);
rp->node[rp->cur_node].tex_end++;
/* Copy from temp to output if needed */
- if (rdest.valid) {
+ if (REG_GET_VALID(rdest)) {
emit_arith(rp, PFS_OP_MAD, rdest, WRITEMASK_XYZW, dest,
pfs_one, pfs_zero, 0);
free_temp(rp, dest);
@@ -770,7 +956,9 @@ static void emit_tex(struct r300_fragment_program *rp,
/* Add sources to FPI1/FPI3 lists. If source is already on list,
* reuse the index instead of wasting a source.
*/
-static int add_src(struct r300_fragment_program *rp, int reg, int pos,
+static int add_src(struct r300_fragment_program *rp,
+ int reg,
+ int pos,
int srcmask)
{
COMPILE_STATE;
@@ -819,9 +1007,12 @@ static int add_src(struct r300_fragment_program *rp, int reg, int pos,
* It's not necessary to force the first case, but it makes disassembled
* shaders easier to read.
*/
-static GLboolean force_same_slot(int vop, int sop,
- GLboolean emit_vop, GLboolean emit_sop,
- int argc, pfs_reg_t *src)
+static GLboolean force_same_slot(int vop,
+ int sop,
+ GLboolean emit_vop,
+ GLboolean emit_sop,
+ int argc,
+ GLuint *src)
{
int i;
@@ -833,20 +1024,24 @@ static GLboolean force_same_slot(int vop, int sop,
if (emit_vop) {
for (i=0;i<argc;i++)
- if (src[i].v_swz == SWIZZLE_WZY)
+ if (REG_GET_VSWZ(src[i]) == SWIZZLE_WZY)
return GL_TRUE;
}
return GL_FALSE;
}
-static void emit_arith(struct r300_fragment_program *rp, int op,
- pfs_reg_t dest, int mask,
- pfs_reg_t src0, pfs_reg_t src1, pfs_reg_t src2,
+static void emit_arith(struct r300_fragment_program *rp,
+ int op,
+ GLuint dest,
+ int mask,
+ GLuint src0,
+ GLuint src1,
+ GLuint src2,
int flags)
{
COMPILE_STATE;
- pfs_reg_t src[3] = { src0, src1, src2 };
+ GLuint src[3] = { src0, src1, src2 };
int hwsrc[3], sswz[3], vswz[3];
int hwdest;
GLboolean emit_vop = GL_FALSE, emit_sop = GL_FALSE;
@@ -863,7 +1058,8 @@ static void emit_arith(struct r300_fragment_program *rp, int op,
if ((mask & WRITEMASK_W) || vop == R300_FPI0_OUTC_REPL_ALPHA)
emit_sop = GL_TRUE;
- if (dest.type == REG_TYPE_OUTPUT && dest.index == FRAG_RESULT_DEPR)
+ if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT &&
+ REG_GET_INDEX(dest) == FRAG_RESULT_DEPR)
emit_vop = GL_FALSE;
if (force_same_slot(vop, sop, emit_vop, emit_sop, argc, src)) {
@@ -879,12 +1075,12 @@ static void emit_arith(struct r300_fragment_program *rp, int op,
*/
for (i=0;i<3;i++) {
if (emit_vop &&
- (v_swiz[src[i].v_swz].flags & SLOT_SCALAR)) {
+ (v_swiz[REG_GET_VSWZ(src[i])].flags & SLOT_SCALAR)) {
vpos = spos = MAX2(vpos, spos);
break;
}
if (emit_sop &&
- (s_swiz[src[i].s_swz].flags & SLOT_VECTOR)) {
+ (s_swiz[REG_GET_SSWZ(src[i])].flags & SLOT_VECTOR)) {
vpos = spos = MAX2(vpos, spos);
break;
}
@@ -908,20 +1104,22 @@ static void emit_arith(struct r300_fragment_program *rp, int op,
if (emit_vop && vop != R300_FPI0_OUTC_REPL_ALPHA) {
srcpos = add_src(rp, hwsrc[i], vpos,
- v_swiz[src[i].v_swz].flags);
- vswz[i] = (v_swiz[src[i].v_swz].base +
- (srcpos * v_swiz[src[i].v_swz].stride)) |
- (src[i].negate_v ? ARG_NEG : 0) |
- (src[i].absolute ? ARG_ABS : 0);
+ v_swiz[REG_GET_VSWZ(src[i])].flags);
+ vswz[i] = (v_swiz[REG_GET_VSWZ(src[i])].base +
+ (srcpos *
+ v_swiz[REG_GET_VSWZ(src[i])].stride)) |
+ ((src[i] & REG_NEGV_MASK) ? ARG_NEG : 0) |
+ ((src[i] & REG_ABS_MASK) ? ARG_ABS : 0);
} else vswz[i] = R300_FPI0_ARGC_ZERO;
if (emit_sop) {
srcpos = add_src(rp, hwsrc[i], spos,
- s_swiz[src[i].s_swz].flags);
- sswz[i] = (s_swiz[src[i].s_swz].base +
- (srcpos * s_swiz[src[i].s_swz].stride)) |
- (src[i].negate_s ? ARG_NEG : 0) |
- (src[i].absolute ? ARG_ABS : 0);
+ s_swiz[REG_GET_SSWZ(src[i])].flags);
+ sswz[i] = (s_swiz[REG_GET_SSWZ(src[i])].base +
+ (srcpos *
+ s_swiz[REG_GET_SSWZ(src[i])].stride)) |
+ ((src[i] & REG_NEGS_MASK) ? ARG_NEG : 0) |
+ ((src[i] & REG_ABS_MASK) ? ARG_ABS : 0);
} else sswz[i] = R300_FPI2_ARGA_ZERO;
}
hwdest = t_hw_dst(rp, dest, GL_FALSE);
@@ -943,8 +1141,8 @@ static void emit_arith(struct r300_fragment_program *rp, int op,
(vswz[2] << R300_FPI0_ARG2C_SHIFT);
rp->alu.inst[vpos].inst1 |= hwdest << R300_FPI1_DSTC_SHIFT;
- if (dest.type == REG_TYPE_OUTPUT) {
- if (dest.index == FRAG_RESULT_COLR) {
+ if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) {
+ if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) {
rp->alu.inst[vpos].inst1 |=
(mask & WRITEMASK_XYZ) << R300_FPI1_DSTC_OUTPUT_MASK_SHIFT;
} else assert(0);
@@ -968,11 +1166,11 @@ static void emit_arith(struct r300_fragment_program *rp, int op,
sswz[2] << R300_FPI2_ARG2A_SHIFT;
if (mask & WRITEMASK_W) {
- if (dest.type == REG_TYPE_OUTPUT) {
- if (dest.index == FRAG_RESULT_COLR) {
+ if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) {
+ if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) {
rp->alu.inst[spos].inst3 |=
(hwdest << R300_FPI3_DSTA_SHIFT) | R300_FPI3_DSTA_OUTPUT;
- } else if (dest.index == FRAG_RESULT_DEPR) {
+ } else if (REG_GET_INDEX(dest) == FRAG_RESULT_DEPR) {
rp->alu.inst[spos].inst3 |= R300_FPI3_DSTA_DEPTH;
} else assert(0);
} else {
@@ -985,33 +1183,52 @@ static void emit_arith(struct r300_fragment_program *rp, int op,
rp->alu.inst[vpos].inst2 = NOP_INST2;
return;
-};
+}
#if 0
-static pfs_reg_t get_attrib(struct r300_fragment_program *rp, GLuint attr)
+static GLuint get_attrib(struct r300_fragment_program *rp, GLuint attr)
{
struct gl_fragment_program *mp = &rp->mesa_program;
- pfs_reg_t r = undef;
+ GLuint r = undef;
if (!(mp->Base.InputsRead & (1<<attr))) {
ERROR("Attribute %d was not provided!\n", attr);
return undef;
}
- r.type = REG_TYPE_INPUT;
- r.index = attr;
- r.valid = GL_TRUE;
+ REG_SET_TYPE(r, REG_TYPE_INPUT);
+ REG_SET_INDEX(r, attr);
+ REG_SET_VALID(r, GL_TRUE);
return r;
}
#endif
+static void make_sin_const(struct r300_fragment_program *rp)
+{
+ if(rp->const_sin[0] == -1){
+ GLfloat cnstv[4];
+
+ cnstv[0] = 1.273239545; // 4/PI
+ cnstv[1] =-0.405284735; // -4/(PI*PI)
+ cnstv[2] = 3.141592654; // PI
+ cnstv[3] = 0.2225; // weight
+ rp->const_sin[0] = emit_const4fv(rp, cnstv);
+
+ cnstv[0] = 0.5;
+ cnstv[1] = -1.5;
+ cnstv[2] = 0.159154943; // 1/(2*PI)
+ cnstv[3] = 6.283185307; // 2*PI
+ rp->const_sin[1] = emit_const4fv(rp, cnstv);
+ }
+}
+
static GLboolean parse_program(struct r300_fragment_program *rp)
{
struct gl_fragment_program *mp = &rp->mesa_program;
const struct prog_instruction *inst = mp->Base.Instructions;
struct prog_instruction *fpi;
- pfs_reg_t src[3], dest, temp;
- pfs_reg_t cnst;
+ GLuint src[3], dest, temp;
+ GLuint cnst;
int flags, mask = 0;
GLfloat cnstv[4] = {0.0, 0.0, 0.0, 0.0};
@@ -1058,62 +1275,71 @@ static GLboolean parse_program(struct r300_fragment_program *rp)
break;
case OPCODE_COS:
/*
- * cos using taylor serie:
- * cos(x) = 1 - x^2/2! + x^4/4! - x^6/6!
+ * cos using a parabola (see SIN):
+ * cos(x):
+ * x += PI/2
+ * x = (x/(2*PI))+0.5
+ * x = frac(x)
+ * x = (x*2*PI)-PI
+ * result = sin(x)
*/
temp = get_temp_reg(rp);
- cnstv[0] = 0.5;
- cnstv[1] = 0.041666667;
- cnstv[2] = 0.001388889;
- cnstv[4] = 0.0;
- cnst = emit_const4fv(rp, cnstv);
+ make_sin_const(rp);
src[0] = t_scalar_src(rp, fpi->SrcReg[0]);
- emit_arith(rp, PFS_OP_MAD, temp,
- WRITEMASK_XYZ,
- src[0],
- src[0],
- pfs_zero,
- flags);
- emit_arith(rp, PFS_OP_MAD, temp,
- WRITEMASK_Y | WRITEMASK_Z,
- temp, temp,
- pfs_zero,
- flags);
- emit_arith(rp, PFS_OP_MAD, temp,
- WRITEMASK_Z,
- temp,
- swizzle(temp, X, X, X, W),
- pfs_zero,
- flags);
- emit_arith(rp, PFS_OP_MAD, temp,
- WRITEMASK_XYZ,
- temp, cnst,
- pfs_zero,
- flags);
- emit_arith(rp, PFS_OP_MAD, temp,
- WRITEMASK_X,
- pfs_one,
- pfs_one,
- negate(temp),
- flags);
- emit_arith(rp, PFS_OP_MAD, temp,
- WRITEMASK_X,
- temp,
- pfs_one,
- swizzle(temp, Y, Y, Y, W),
- flags);
- emit_arith(rp, PFS_OP_MAD, temp,
- WRITEMASK_X,
- temp,
- pfs_one,
- negate(swizzle(temp, Z, Z, Z, W)),
- flags);
- emit_arith(rp, PFS_OP_MAD, dest, mask,
+ /* add 0.5*PI and do range reduction */
+
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
+ swizzle(rp->const_sin[0], Z, Z, Z, Z), //PI
+ pfs_half,
+ swizzle(keep(src[0]), X, X, X, X),
+ 0);
+
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
swizzle(temp, X, X, X, X),
- pfs_one,
+ swizzle(rp->const_sin[1], Z, Z, Z, Z),
+ pfs_half,
+ 0);
+
+ emit_arith(rp, PFS_OP_FRC, temp, WRITEMASK_X,
+ swizzle(temp, X, X, X, X),
+ undef,
+ undef,
+ 0);
+
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Z,
+ swizzle(temp, X, X, X, X),
+ swizzle(rp->const_sin[1], W, W, W, W), //2*PI
+ negate(swizzle(rp->const_sin[0], Z, Z, Z, Z)), //-PI
+ 0);
+
+ /* SIN */
+
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y,
+ swizzle(temp, Z, Z, Z, Z),
+ rp->const_sin[0],
pfs_zero,
+ 0);
+
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
+ swizzle(temp, Y, Y, Y, Y),
+ absolute(swizzle(temp, Z, Z, Z, Z)),
+ swizzle(temp, X, X, X, X),
+ 0);
+
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y,
+ swizzle(temp, X, X, X, X),
+ absolute(swizzle(temp, X, X, X, X)),
+ negate(swizzle(temp, X, X, X, X)),
+ 0);
+
+
+ emit_arith(rp, PFS_OP_MAD, dest, mask,
+ swizzle(temp, Y, Y, Y, Y),
+ swizzle(rp->const_sin[0], W, W, W, W),
+ swizzle(temp, X, X, X, X),
flags);
+
free_temp(rp, temp);
break;
case OPCODE_DP3:
@@ -1167,7 +1393,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp)
/* result.x = 1.0
* result.w = src1.w */
if (mask & WRITEMASK_XW) {
- src[1].v_swz = SWIZZLE_111; /* Cheat.. */
+ REG_SET_VSWZ(src[1], SWIZZLE_111); /*Cheat*/
emit_arith(rp, PFS_OP_MAD, dest,
mask & WRITEMASK_XW,
src[1], pfs_one, pfs_zero,
@@ -1222,7 +1448,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp)
* change the compare to (t.x + 0.5) > 0.5 we may
* save one instruction by doing CMP -t.x
*/
- cnstv[0] = cnstv[1] = cnstv[2] = cnstv[4] = 0.50001;
+ cnstv[0] = cnstv[1] = cnstv[2] = cnstv[3] = 0.50001;
src[0] = t_src(rp, fpi->SrcReg[0]);
temp = get_temp_reg(rp);
cnst = emit_const4fv(rp, cnstv);
@@ -1353,7 +1579,93 @@ static GLboolean parse_program(struct r300_fragment_program *rp)
flags);
break;
case OPCODE_SCS:
- ERROR("SCS not implemented\n");
+ /*
+ * cos using a parabola (see SIN):
+ * cos(x):
+ * x += PI/2
+ * x = (x/(2*PI))+0.5
+ * x = frac(x)
+ * x = (x*2*PI)-PI
+ * result = sin(x)
+ */
+ temp = get_temp_reg(rp);
+ make_sin_const(rp);
+ src[0] = t_scalar_src(rp, fpi->SrcReg[0]);
+
+ /* add 0.5*PI and do range reduction */
+
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X|WRITEMASK_Y,
+ swizzle(rp->const_sin[0], Z, Z, Z, Z),
+ rp->const_sin[1],
+ swizzle(keep(src[0]), X, X, X, X),
+ 0);
+
+ emit_arith(rp, PFS_OP_CMP, temp, WRITEMASK_W,
+ swizzle(rp->const_sin[0], Z, Z, Z, Z),
+ negate(pfs_half),
+ swizzle(keep(src[0]), X, X, X, X),
+ 0);
+
+ emit_arith(rp, PFS_OP_CMP, temp, WRITEMASK_Z,
+ swizzle(temp, X, X, X, X),
+ swizzle(temp, Y, Y, Y, Y),
+ swizzle(temp, W, W, W, W),
+ 0);
+
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y,
+ swizzle(temp, Z, Z, Z, Z),
+ rp->const_sin[0],
+ pfs_zero,
+ 0);
+
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W,
+ swizzle(temp, Y, Y, Y, Y),
+ absolute(swizzle(temp, Z, Z, Z, Z)),
+ swizzle(temp, X, X, X, X),
+ 0);
+
+ if(mask & WRITEMASK_Y)
+ {
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y,
+ swizzle(keep(src[0]), X, X, X, X),
+ rp->const_sin[0],
+ pfs_zero,
+ 0);
+
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
+ swizzle(temp, Y, Y, Y, Y),
+ absolute(swizzle(keep(src[0]), X, X, X, X)),
+ swizzle(temp, X, X, X, X),
+ 0);
+ }
+
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Z,
+ swizzle(temp, W, W, W, W),
+ absolute(swizzle(temp, W, W, W, W)),
+ negate(swizzle(temp, W, W, W, W)),
+ 0);
+
+ emit_arith(rp, PFS_OP_MAD, dest, WRITEMASK_X,
+ swizzle(temp, Z, Z, Z, Z),
+ swizzle(rp->const_sin[0], W, W, W, W),
+ swizzle(temp, W, W, W, W),
+ flags);
+
+ if(mask & WRITEMASK_Y)
+ {
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W,
+ swizzle(temp, X, X, X, X),
+ absolute(swizzle(temp, X, X, X, X)),
+ negate(swizzle(temp, X, X, X, X)),
+ 0);
+
+ emit_arith(rp, PFS_OP_MAD, dest, WRITEMASK_Y,
+ swizzle(temp, W, W, W, W),
+ swizzle(rp->const_sin[0], W, W, W, W),
+ swizzle(temp, X, X, X, X),
+ flags);
+ }
+ free_temp(rp, temp);
break;
case OPCODE_SGE:
src[0] = t_src(rp, fpi->SrcReg[0]);
@@ -1372,68 +1684,63 @@ static GLboolean parse_program(struct r300_fragment_program *rp)
break;
case OPCODE_SIN:
/*
- * sin using taylor serie:
- * sin(x) = x - x^3/3! + x^5/5! - x^7/7!
+ * using a parabola:
+ * sin(x) = 4/pi * x + -4/(pi*pi) * x * abs(x)
+ * extra precision is obtained by weighting against
+ * itself squared.
*/
+
temp = get_temp_reg(rp);
- cnstv[0] = 0.333333333;
- cnstv[1] = 0.008333333;
- cnstv[2] = 0.000198413;
- cnstv[4] = 0.0;
- cnst = emit_const4fv(rp, cnstv);
+ make_sin_const(rp);
src[0] = t_scalar_src(rp, fpi->SrcReg[0]);
- emit_arith(rp, PFS_OP_MAD, temp,
- WRITEMASK_XYZ,
- src[0],
- src[0],
- pfs_zero,
- flags);
- emit_arith(rp, PFS_OP_MAD, temp,
- WRITEMASK_Y | WRITEMASK_Z,
- temp, temp,
- pfs_zero,
- flags);
- emit_arith(rp, PFS_OP_MAD, temp,
- WRITEMASK_Z,
- temp,
- swizzle(temp, X, X, X, W),
- pfs_zero,
- flags);
- emit_arith(rp, PFS_OP_MAD, temp,
- WRITEMASK_XYZ,
- src[0],
- temp,
- pfs_zero,
- flags);
- emit_arith(rp, PFS_OP_MAD, temp,
- WRITEMASK_XYZ,
- temp, cnst,
- pfs_zero,
- flags);
- emit_arith(rp, PFS_OP_MAD, temp,
- WRITEMASK_X,
- src[0],
- pfs_one,
- negate(temp),
- flags);
- emit_arith(rp, PFS_OP_MAD, temp,
- WRITEMASK_X,
- temp,
- pfs_one,
- swizzle(temp, Y, Y, Y, W),
- flags);
- emit_arith(rp, PFS_OP_MAD, temp,
- WRITEMASK_X,
- temp,
- pfs_one,
- negate(swizzle(temp, Z, Z, Z, W)),
- flags);
- emit_arith(rp, PFS_OP_MAD, dest, mask,
+ /* do range reduction */
+
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
+ swizzle(keep(src[0]), X, X, X, X),
+ swizzle(rp->const_sin[1], Z, Z, Z, Z),
+ pfs_half,
+ 0);
+
+ emit_arith(rp, PFS_OP_FRC, temp, WRITEMASK_X,
swizzle(temp, X, X, X, X),
- pfs_one,
+ undef,
+ undef,
+ 0);
+
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Z,
+ swizzle(temp, X, X, X, X),
+ swizzle(rp->const_sin[1], W, W, W, W), //2*PI
+ negate(swizzle(rp->const_sin[0], Z, Z, Z, Z)), //PI
+ 0);
+
+ /* SIN */
+
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y,
+ swizzle(temp, Z, Z, Z, Z),
+ rp->const_sin[0],
pfs_zero,
+ 0);
+
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
+ swizzle(temp, Y, Y, Y, Y),
+ absolute(swizzle(temp, Z, Z, Z, Z)),
+ swizzle(temp, X, X, X, X),
+ 0);
+
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y,
+ swizzle(temp, X, X, X, X),
+ absolute(swizzle(temp, X, X, X, X)),
+ negate(swizzle(temp, X, X, X, X)),
+ 0);
+
+
+ emit_arith(rp, PFS_OP_MAD, dest, mask,
+ swizzle(temp, Y, Y, Y, Y),
+ swizzle(rp->const_sin[0], W, W, W, W),
+ swizzle(temp, X, X, X, X),
flags);
+
free_temp(rp, temp);
break;
case OPCODE_SLT:
@@ -1505,7 +1812,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp)
/* - Init structures
* - Determine what hwregs each input corresponds to
*/
-static void init_program(struct r300_fragment_program *rp)
+static void init_program(r300ContextPtr r300, struct r300_fragment_program *rp)
{
struct r300_pfs_compile_state *cs = NULL;
struct gl_fragment_program *mp = &rp->mesa_program;
@@ -1515,6 +1822,7 @@ static void init_program(struct r300_fragment_program *rp)
int i,j;
/* New compile, reset tracking data */
+ rp->optimization = driQueryOptioni(&r300->radeon.optionCache, "fp_optimization");
rp->translated = GL_FALSE;
rp->error = GL_FALSE;
rp->cs = cs = &(R300_CONTEXT(rp->ctx)->state.pfs_compile);
@@ -1527,6 +1835,7 @@ static void init_program(struct r300_fragment_program *rp)
rp->max_temp_idx = 0;
rp->node[0].alu_end = -1;
rp->node[0].tex_end = -1;
+ rp->const_sin[0] = -1;
_mesa_memset(cs, 0, sizeof(*rp->cs));
for (i=0;i<PFS_MAX_ALU_INST;i++) {
@@ -1640,13 +1949,13 @@ static void update_params(struct r300_fragment_program *rp)
rp->params_uptodate = GL_TRUE;
}
-void r300_translate_fragment_shader(struct r300_fragment_program *rp)
+void r300_translate_fragment_shader(r300ContextPtr r300, struct r300_fragment_program *rp)
{
struct r300_pfs_compile_state *cs = NULL;
if (!rp->translated) {
- init_program(rp);
+ init_program(r300, rp);
cs = rp->cs;
if (parse_program(rp) == GL_FALSE) {