summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormichal <michal@michal-laptop.(none)>2007-07-27 12:50:38 +0200
committermichal <michal@michal-laptop.(none)>2007-07-27 12:50:38 +0200
commit87555946478f58111013554e5af5c8844f944420 (patch)
tree2b94bbc390518bd863bd17428c28e897edc4ab15
parente0a26b046764ae80748b347395ab1b27de83651e (diff)
Avoid unnecessary input attrib copy by aligning exec_machine attribs.
-rw-r--r--src/mesa/pipe/softpipe/sp_quad_fs.c79
1 files changed, 44 insertions, 35 deletions
diff --git a/src/mesa/pipe/softpipe/sp_quad_fs.c b/src/mesa/pipe/softpipe/sp_quad_fs.c
index 301bd13aa9..d4acf402ce 100644
--- a/src/mesa/pipe/softpipe/sp_quad_fs.c
+++ b/src/mesa/pipe/softpipe/sp_quad_fs.c
@@ -39,10 +39,20 @@
#include "sp_quad.h"
#include "core/tgsi_core.h"
+#if defined __GNUC__
+#define ALIGNED_ATTRIBS 1
+#else
+#define ALIGNED_ATTRIBS 0
+#endif
+
struct exec_machine {
const struct setup_coefficient *coef; /**< will point to quad->coef */
- GLfloat attr[FRAG_ATTRIB_MAX][4][QUAD_SIZE];
+#if ALIGNED_ATTRIBS
+ GLfloat attr[FRAG_ATTRIB_MAX][NUM_CHANNELS][QUAD_SIZE] __attribute__(( aligned( 16 ) ));
+#else
+ GLfloat attr[FRAG_ATTRIB_MAX][NUM_CHANNELS][QUAD_SIZE];
+#endif
};
@@ -162,7 +172,7 @@ shade_quad( struct quad_stage *qs, struct quad_header *quad )
for (i = 0; i < NUM_CHANNELS; i++)
cinterp(&exec, attr, i);
break;
-
+
case INTERP_LINEAR:
for (i = 0; i < NUM_CHANNELS; i++)
linterp(&exec, attr, i);
@@ -180,56 +190,55 @@ shade_quad( struct quad_stage *qs, struct quad_header *quad )
{
struct tgsi_exec_machine machine;
- struct tgsi_exec_vector inputs[FRAG_ATTRIB_MAX + 1];
struct tgsi_exec_vector outputs[FRAG_ATTRIB_MAX + 1];
- struct tgsi_exec_vector *ainputs;
struct tgsi_exec_vector *aoutputs;
- GLuint i /*, total*/;
+ GLuint i;
+
+#if !ALIGNED_ATTRIBS
+ struct tgsi_exec_vector inputs[FRAG_ATTRIB_MAX + 1];
+ struct tgsi_exec_vector *ainputs;
+#endif
#ifdef DEBUG
memset(&machine, 0, sizeof(machine));
#endif
- ainputs = (struct tgsi_exec_vector *) tgsi_align_128bit( inputs );
+ /* init machine state */
+ tgsi_exec_machine_init(
+ &machine,
+ softpipe->fs.tokens );
+
+ /* Consts does not require 16 byte alignment. */
+ machine.Consts = softpipe->fs.constants->constant;
+
aoutputs = (struct tgsi_exec_vector *) tgsi_align_128bit( outputs );
+ machine.Outputs = aoutputs;
+
+ assert( sizeof( struct tgsi_exec_vector ) == sizeof( exec.attr[0] ) );
-#if 0
- for( i = total = 0; i < PIPE_ATTRIB_MAX; i++ ) {
- GLuint attr;
-
- attr = softpipe->fp_attr_to_slot[i];
- if( attr || total == 0) {
- assert( total < FRAG_ATTRIB_MAX );
- assert( attr < FRAG_ATTRIB_MAX );
- assert( sizeof( ainputs[0] ) == sizeof( exec.attr[0] ) );
-
- memcpy(
- &ainputs[total],
- exec.attr[attr],
- sizeof( ainputs[0] ) );
- total++;
- }
+#if ALIGNED_ATTRIBS
+ machine.Inputs = (struct tgsi_exec_vector *) exec.attr;
+
+ for (i = 0; i < softpipe->nr_attrs; i++) {
+ /* Make sure fp_attr_to_slot[] is an identity transform. */
+ assert( softpipe->fp_attr_to_slot[i] == i );
}
#else
+ ainputs = (struct tgsi_exec_vector *) tgsi_align_128bit( inputs );
+ machine.Inputs = ainputs;
+
/* load input registers */
- /* XXX simpler than above, but might not be right... */
for (i = 0; i < softpipe->nr_attrs; i++) {
+ /* Make sure fp_attr_to_slot[] is an identity transform. */
+ assert( softpipe->fp_attr_to_slot[i] == i );
+
memcpy(
- &ainputs[i],
- exec.attr[i],
- sizeof( ainputs[0] ) );
+ &ainputs[i],
+ exec.attr[i],
+ sizeof( ainputs[0] ) );
}
#endif
- /* init machine state */
- tgsi_exec_machine_init(
- &machine,
- softpipe->fs.tokens );
-
- machine.Inputs = ainputs;
- machine.Outputs = aoutputs;
- machine.Consts = softpipe->fs.constants->constant; /* XXX alignment? */
-
/* run shader */
tgsi_exec_machine_run( &machine );