summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_annotate.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/r300/compiler/radeon_dataflow_annotate.c')
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow_annotate.c365
1 files changed, 365 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_annotate.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_annotate.c
new file mode 100644
index 0000000000..41d175a22f
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_annotate.c
@@ -0,0 +1,365 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+
+
+struct dataflow_state {
+ struct radeon_compiler * C;
+ unsigned int DCE:1;
+ unsigned int UpdateRunning:1;
+
+ struct rc_dataflow_vector * Input[RC_REGISTER_MAX_INDEX];
+ struct rc_dataflow_vector * Output[RC_REGISTER_MAX_INDEX];
+ struct rc_dataflow_vector * Temporary[RC_REGISTER_MAX_INDEX];
+ struct rc_dataflow_vector * Address;
+
+ struct rc_dataflow_vector ** UpdateStack;
+ unsigned int UpdateStackSize;
+ unsigned int UpdateStackReserved;
+};
+
+static void mark_vector_use(struct dataflow_state * s, struct rc_dataflow_vector * vector, unsigned int mask);
+
+static struct rc_dataflow_vector * get_register_contents(struct dataflow_state * s,
+ rc_register_file file, unsigned int index)
+{
+ if (file == RC_FILE_INPUT || file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) {
+ if (index >= RC_REGISTER_MAX_INDEX)
+ return 0; /* cannot happen, but be defensive */
+
+ if (file == RC_FILE_TEMPORARY)
+ return s->Temporary[index];
+ if (file == RC_FILE_INPUT)
+ return s->Input[index];
+ if (file == RC_FILE_OUTPUT)
+ return s->Output[index];
+ }
+
+ if (file == RC_FILE_ADDRESS)
+ return s->Address;
+
+ return 0; /* can happen, constant register file */
+}
+
+static void mark_ref_use(struct dataflow_state * s, struct rc_dataflow_ref * ref, unsigned int mask)
+{
+ if (!(mask & ~ref->UseMask))
+ return;
+
+ ref->UseMask |= mask;
+ mark_vector_use(s, ref->Vector, ref->UseMask);
+}
+
+static void mark_source_use(struct dataflow_state * s, struct rc_instruction * inst,
+ unsigned int src, unsigned int srcmask)
+{
+ unsigned int refmask = 0;
+
+ for(unsigned int i = 0; i < 4; ++i) {
+ if (GET_BIT(srcmask, i))
+ refmask |= 1 << GET_SWZ(inst->I.SrcReg[src].Swizzle, i);
+ }
+
+ /* get rid of spurious bits from ZERO, ONE, etc. swizzles */
+ refmask &= RC_MASK_XYZW;
+
+ if (!refmask)
+ return; /* can happen if the swizzle contains constant components */
+
+ if (inst->Dataflow.SrcReg[src])
+ mark_ref_use(s, inst->Dataflow.SrcReg[src], refmask);
+
+ if (inst->Dataflow.SrcRegAddress[src])
+ mark_ref_use(s, inst->Dataflow.SrcRegAddress[src], RC_MASK_X);
+}
+
+static void compute_sources_for_writemask(
+ struct rc_instruction * inst,
+ unsigned int writemask,
+ unsigned int *srcmasks)
+{
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
+
+ srcmasks[0] = 0;
+ srcmasks[1] = 0;
+ srcmasks[2] = 0;
+
+ if (inst->I.Opcode == RC_OPCODE_KIL)
+ srcmasks[0] |= RC_MASK_XYZW;
+
+ if (!writemask)
+ return;
+
+ if (opcode->IsComponentwise) {
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src)
+ srcmasks[src] |= writemask;
+ } else if (opcode->IsStandardScalar) {
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src)
+ srcmasks[src] |= RC_MASK_X;
+ } else {
+ switch(inst->I.Opcode) {
+ case RC_OPCODE_ARL:
+ srcmasks[0] |= RC_MASK_X;
+ break;
+ case RC_OPCODE_DP3:
+ srcmasks[0] |= RC_MASK_XYZ;
+ srcmasks[1] |= RC_MASK_XYZ;
+ break;
+ case RC_OPCODE_DP4:
+ srcmasks[0] |= RC_MASK_XYZW;
+ srcmasks[1] |= RC_MASK_XYZW;
+ break;
+ case RC_OPCODE_TEX:
+ case RC_OPCODE_TXB:
+ case RC_OPCODE_TXP:
+ srcmasks[0] |= RC_MASK_XYZW;
+ break;
+ case RC_OPCODE_DST:
+ srcmasks[0] |= 0x6;
+ srcmasks[1] |= 0xa;
+ break;
+ case RC_OPCODE_EXP:
+ case RC_OPCODE_LOG:
+ srcmasks[0] |= RC_MASK_XY;
+ break;
+ case RC_OPCODE_LIT:
+ srcmasks[0] |= 0xb;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void mark_instruction_source_use(struct dataflow_state * s,
+ struct rc_instruction * inst, unsigned int writemask)
+{
+ unsigned int srcmasks[3];
+
+ compute_sources_for_writemask(inst, writemask, srcmasks);
+
+ for(unsigned int src = 0; src < 3; ++src)
+ mark_source_use(s, inst, src, srcmasks[src]);
+}
+
+static void run_update(struct dataflow_state * s)
+{
+ s->UpdateRunning = 1;
+
+ while(s->UpdateStackSize) {
+ struct rc_dataflow_vector * vector = s->UpdateStack[--s->UpdateStackSize];
+ vector->PassBit = 0;
+
+ if (vector->WriteInstruction) {
+ struct rc_instruction * inst = vector->WriteInstruction;
+
+ if (inst->Dataflow.DstRegPrev) {
+ unsigned int carryover = vector->UseMask & ~inst->I.DstReg.WriteMask;
+
+ if (carryover)
+ mark_ref_use(s, inst->Dataflow.DstRegPrev, carryover);
+ }
+
+ mark_instruction_source_use(
+ s, vector->WriteInstruction,
+ vector->UseMask & inst->I.DstReg.WriteMask);
+ }
+ }
+
+ s->UpdateRunning = 0;
+}
+
+static void mark_vector_use(struct dataflow_state * s, struct rc_dataflow_vector * vector, unsigned int mask)
+{
+ if (!(mask & ~vector->UseMask))
+ return; /* no new used bits */
+
+ vector->UseMask |= mask;
+ if (vector->PassBit)
+ return;
+
+ if (s->UpdateStackSize >= s->UpdateStackReserved) {
+ unsigned int new_reserve = 2 * s->UpdateStackReserved;
+ struct rc_dataflow_vector ** new_stack;
+
+ if (!new_reserve)
+ new_reserve = 16;
+
+ new_stack = memory_pool_malloc(&s->C->Pool, new_reserve * sizeof(struct rc_dataflow_vector *));
+ memcpy(new_stack, s->UpdateStack, s->UpdateStackSize * sizeof(struct rc_dataflow_vector *));
+
+ s->UpdateStack = new_stack;
+ s->UpdateStackReserved = new_reserve;
+ }
+
+ s->UpdateStack[s->UpdateStackSize++] = vector;
+ vector->PassBit = 1;
+
+ if (!s->UpdateRunning)
+ run_update(s);
+}
+
+static void annotate_instruction(struct dataflow_state * s, struct rc_instruction * inst)
+{
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
+ unsigned int src;
+
+ for(src = 0; src < opcode->NumSrcRegs; ++src) {
+ struct rc_dataflow_vector * vector = get_register_contents(s, inst->I.SrcReg[src].File, inst->I.SrcReg[src].Index);
+ if (vector) {
+ inst->Dataflow.SrcReg[src] = rc_dataflow_create_ref(s->C, vector, inst);
+ }
+ if (inst->I.SrcReg[src].RelAddr) {
+ struct rc_dataflow_vector * addr = get_register_contents(s, RC_FILE_ADDRESS, 0);
+ if (addr)
+ inst->Dataflow.SrcRegAddress[src] = rc_dataflow_create_ref(s->C, addr, inst);
+ }
+ }
+
+ mark_instruction_source_use(s, inst, 0); /* for KIL */
+
+ if (opcode->HasDstReg) {
+ struct rc_dataflow_vector * oldvec = get_register_contents(s, inst->I.DstReg.File, inst->I.DstReg.Index);
+ struct rc_dataflow_vector * newvec = rc_dataflow_create_vector(s->C, inst->I.DstReg.File, inst->I.DstReg.Index, inst);
+
+ newvec->ValidMask = inst->I.DstReg.WriteMask;
+
+ if (oldvec) {
+ unsigned int carryover = oldvec->ValidMask & ~inst->I.DstReg.WriteMask;
+
+ if (oldvec->ValidMask)
+ inst->Dataflow.DstRegAliased = 1;
+
+ if (carryover) {
+ inst->Dataflow.DstRegPrev = rc_dataflow_create_ref(s->C, oldvec, inst);
+ newvec->ValidMask |= carryover;
+
+ if (!s->DCE)
+ mark_ref_use(s, inst->Dataflow.DstRegPrev, carryover);
+ }
+ }
+
+ inst->Dataflow.DstReg = newvec;
+
+ if (newvec->File == RC_FILE_TEMPORARY)
+ s->Temporary[newvec->Index] = newvec;
+ else if (newvec->File == RC_FILE_OUTPUT)
+ s->Output[newvec->Index] = newvec;
+ else
+ s->Address = newvec;
+
+ if (!s->DCE)
+ mark_vector_use(s, newvec, inst->I.DstReg.WriteMask);
+ }
+}
+
+static void init_inputs(struct dataflow_state * s)
+{
+ unsigned int index;
+
+ for(index = 0; index < 32; ++index) {
+ if (s->C->Program.InputsRead & (1 << index)) {
+ s->Input[index] = rc_dataflow_create_vector(s->C, RC_FILE_INPUT, index, 0);
+ s->Input[index]->ValidMask = RC_MASK_XYZW;
+ }
+ }
+}
+
+static void mark_output_use(void * data, unsigned int index, unsigned int mask)
+{
+ struct dataflow_state * s = data;
+ struct rc_dataflow_vector * vec = s->Output[index];
+
+ if (vec)
+ mark_vector_use(s, vec, mask);
+}
+
+void rc_dataflow_annotate(struct radeon_compiler * c, rc_dataflow_mark_outputs_fn dce, void * userdata)
+{
+ struct dataflow_state s;
+ struct rc_instruction * inst;
+
+ memset(&s, 0, sizeof(s));
+ s.C = c;
+ s.DCE = dce ? 1 : 0;
+
+ init_inputs(&s);
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ annotate_instruction(&s, inst);
+ }
+
+ if (s.DCE) {
+ dce(userdata, &s, &mark_output_use);
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
+
+ if (opcode->HasDstReg) {
+ unsigned int redundant_writes = inst->I.DstReg.WriteMask & ~inst->Dataflow.DstReg->UseMask;
+
+ inst->I.DstReg.WriteMask &= ~redundant_writes;
+
+ if (!inst->I.DstReg.WriteMask) {
+ struct rc_instruction * todelete = inst;
+ inst = inst->Prev;
+ rc_remove_instruction(todelete);
+ continue;
+ }
+ }
+
+ unsigned int srcmasks[3];
+ compute_sources_for_writemask(inst, inst->I.DstReg.WriteMask, srcmasks);
+
+ for(unsigned int src = 0; src < 3; ++src) {
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ if (!GET_BIT(srcmasks[src], chan))
+ SET_SWZ(inst->I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
+ }
+
+ if (inst->Dataflow.SrcReg[src]) {
+ if (!inst->Dataflow.SrcReg[src]->UseMask) {
+ rc_dataflow_remove_ref(inst->Dataflow.SrcReg[src]);
+ inst->Dataflow.SrcReg[src] = 0;
+ }
+ }
+
+ if (inst->Dataflow.SrcRegAddress[src]) {
+ if (!inst->Dataflow.SrcRegAddress[src]->UseMask) {
+ rc_dataflow_remove_ref(inst->Dataflow.SrcRegAddress[src]);
+ inst->Dataflow.SrcRegAddress[src] = 0;
+ }
+ }
+ }
+ }
+
+ rc_calculate_inputs_outputs(c);
+ }
+}