From 1680ef869625dc1fe9cf481b180382a34e0738e7 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 3 Oct 2008 17:30:59 +0100 Subject: mesa: avoid generating constant vertex attributes in fixedfunc programs Keep track of enabled/active vertex attributes. Keep track of potential vertex program outputs. When generating fragment program, replace references to fragment attributes which are effectively non-varying and non-computed passthrough attributes with references to the new CURRENT_ATTRIB tracked state value. Only downside is slight ugliness in VBO code where we need to validate state twice in succession. --- src/mesa/main/mtypes.h | 2 + src/mesa/main/state.c | 38 ++++++++++++++++- src/mesa/main/state.h | 3 ++ src/mesa/main/texenvprogram.c | 94 ++++++++++++++++++++++++++++++++++++++++--- src/mesa/vbo/vbo_exec_array.c | 41 +++++++++++++++---- src/mesa/vbo/vbo_exec_draw.c | 8 ++++ src/mesa/vbo/vbo_save_draw.c | 4 ++ 7 files changed, 177 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index bc099dabeb..ca1e369a35 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -3073,6 +3073,8 @@ struct __GLcontextRec GLenum RenderMode; /**< either GL_RENDER, GL_SELECT, GL_FEEDBACK */ GLbitfield NewState; /**< bitwise-or of _NEW_* flags */ + GLuint varying_vp_inputs; + /** \name Derived state */ /*@{*/ GLbitfield _TriangleCaps; /**< bitwise-or of DD_* flags */ diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c index eb8dc2a339..e0eb5f81e2 100644 --- a/src/mesa/main/state.c +++ b/src/mesa/main/state.c @@ -465,7 +465,8 @@ _mesa_update_state_locked( GLcontext *ctx ) _mesa_update_tnl_spaces( ctx, new_state ); if (ctx->FragmentProgram._MaintainTexEnvProgram) { - prog_flags |= (_NEW_TEXTURE | _NEW_FOG | _DD_NEW_SEPARATE_SPECULAR); + prog_flags |= (_NEW_ARRAY | _NEW_TEXTURE_MATRIX | _NEW_LIGHT | + _NEW_TEXTURE | _NEW_FOG | _DD_NEW_SEPARATE_SPECULAR); } if (ctx->VertexProgram._MaintainTnlProgram) { prog_flags |= (_NEW_ARRAY | _NEW_TEXTURE | _NEW_TEXTURE_MATRIX | @@ -504,3 +505,38 @@ _mesa_update_state( GLcontext *ctx ) _mesa_update_state_locked(ctx); _mesa_unlock_context_textures(ctx); } + + + + +/* Want to figure out which fragment program inputs are actually + * constant/current values from ctx->Current. These should be + * referenced as a tracked state variable rather than a fragment + * program input, to save the overhead of putting a constant value in + * every submitted vertex, transferring it to hardware, interpolating + * it across the triangle, etc... + * + * When there is a VP bound, just use vp->outputs. But when we're + * generating vp from fixed function state, basically want to + * calculate: + * + * vp_out_2_fp_in( vp_in_2_vp_out( varying_inputs ) | + * potential_vp_outputs ) + * + * Where potential_vp_outputs is calculated by looking at enabled + * texgen, etc. + * + * The generated fragment program should then only declare inputs that + * may vary or otherwise differ from the ctx->Current values. + * Otherwise, the fp should track them as state values instead. + */ +void +_mesa_set_varying_vp_inputs( GLcontext *ctx, + unsigned varying_inputs ) +{ + if (ctx->varying_vp_inputs != varying_inputs) { + ctx->varying_vp_inputs = varying_inputs; + ctx->NewState |= _NEW_ARRAY; + //_mesa_printf("%s %x\n", __FUNCTION__, varying_inputs); + } +} diff --git a/src/mesa/main/state.h b/src/mesa/main/state.h index bb7cb8f32a..dc08043a76 100644 --- a/src/mesa/main/state.h +++ b/src/mesa/main/state.h @@ -37,5 +37,8 @@ _mesa_update_state( GLcontext *ctx ); extern void _mesa_update_state_locked( GLcontext *ctx ); +void +_mesa_set_varying_vp_inputs( GLcontext *ctx, + unsigned varying_inputs ); #endif diff --git a/src/mesa/main/texenvprogram.c b/src/mesa/main/texenvprogram.c index ac49373604..7cd82f98b0 100644 --- a/src/mesa/main/texenvprogram.c +++ b/src/mesa/main/texenvprogram.c @@ -189,6 +189,63 @@ static GLuint translate_tex_src_bit( GLbitfield bit ) } } +#define VERT_BIT_TEX_ANY (0xff << VERT_ATTRIB_TEX0) +#define VERT_RESULT_TEX_ANY (0xff << VERT_RESULT_TEX0) + +/* Identify all possible varying inputs. The fragment program will + * never reference non-varying inputs, but will track them via state + * constants instead. + * + * This function figures out all the inputs that the fragment program + * has access to. The bitmask is later reduced to just those which + * are actually referenced. + */ +static GLuint get_fp_input_mask( GLcontext *ctx ) +{ + GLuint fp_inputs = 0; + + if (1) { + GLuint varying_inputs = ctx->varying_vp_inputs; + + /* First look at what values may be computed by the generated + * vertex program: + */ + if (ctx->Light.Enabled) { + fp_inputs |= FRAG_BIT_COL0; + + if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) + fp_inputs |= FRAG_BIT_COL1; + } + + fp_inputs |= (ctx->Texture._TexGenEnabled | + ctx->Texture._TexMatEnabled) << FRAG_ATTRIB_TEX0; + + /* Then look at what might be varying as a result of enabled + * arrays, etc: + */ + if (varying_inputs & VERT_BIT_COLOR0) fp_inputs |= FRAG_BIT_COL0; + if (varying_inputs & VERT_BIT_COLOR1) fp_inputs |= FRAG_BIT_COL1; + + fp_inputs |= (((varying_inputs & VERT_BIT_TEX_ANY) >> VERT_ATTRIB_TEX0) + << FRAG_ATTRIB_TEX0); + + } + else { + /* calculate from vp->outputs */ + GLuint vp_outputs = 0; + + if (vp_outputs & (1 << VERT_RESULT_COL0)) fp_inputs |= FRAG_BIT_COL0; + if (vp_outputs & (1 << VERT_RESULT_COL1)) fp_inputs |= FRAG_BIT_COL1; + + fp_inputs |= (((vp_outputs & VERT_RESULT_TEX_ANY) + << VERT_RESULT_TEX0) + >> FRAG_ATTRIB_TEX0); + } + + return fp_inputs; +} + + /** * Examine current texture environment state and generate a unique * key to identify it. @@ -196,17 +253,21 @@ static GLuint translate_tex_src_bit( GLbitfield bit ) static void make_state_key( GLcontext *ctx, struct state_key *key ) { GLuint i, j; - + GLuint inputs_referenced = FRAG_BIT_COL0; + GLuint inputs_available = get_fp_input_mask( ctx ); + memset(key, 0, sizeof(*key)); for (i=0;iTexture.Unit[i]; - if (!texUnit->_ReallyEnabled) + if (!texUnit->_ReallyEnabled) continue; key->unit[i].enabled = 1; key->enabled_units |= (1<nr_enabled_units = i+1; + inputs_referenced |= FRAG_BIT_TEX(i); key->unit[i].source_index = translate_tex_src_bit(texUnit->_ReallyEnabled); @@ -234,13 +295,18 @@ static void make_state_key( GLcontext *ctx, struct state_key *key ) } } - if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) + if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) { key->separate_specular = 1; + inputs_referenced |= FRAG_BIT_COL1; + } if (ctx->Fog.Enabled) { key->fog_enabled = 1; key->fog_mode = translate_fog_mode(ctx->Fog.Mode); + inputs_referenced |= FRAG_BIT_FOGC; /* maybe */ } + + key->inputs_available = (inputs_available & inputs_referenced); } /* Use uregs to represent registers internally, translate to Mesa's @@ -446,11 +512,29 @@ static struct ureg register_param5( struct texenv_fragment_program *p, #define register_param3(p,s0,s1,s2) register_param5(p,s0,s1,s2,0,0) #define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0) +static GLuint frag_to_vert_attrib( GLuint attrib ) +{ + switch (attrib) { + case FRAG_ATTRIB_COL0: return VERT_ATTRIB_COLOR0; + case FRAG_ATTRIB_COL1: return VERT_ATTRIB_COLOR1; + default: + assert(attrib >= FRAG_ATTRIB_TEX0); + assert(attrib <= FRAG_ATTRIB_TEX7); + return attrib - FRAG_ATTRIB_TEX0 + VERT_ATTRIB_TEX0; + } +} + static struct ureg register_input( struct texenv_fragment_program *p, GLuint input ) { - p->program->Base.InputsRead |= (1 << input); - return make_ureg(PROGRAM_INPUT, input); + if (p->state->inputs_available & (1<program->Base.InputsRead |= (1 << input); + return make_ureg(PROGRAM_INPUT, input); + } + else { + GLuint idx = frag_to_vert_attrib( input ); + return register_param3( p, STATE_INTERNAL, STATE_CURRENT_ATTRIB, idx ); + } } diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c index 0f9d8da356..3d74f9f431 100644 --- a/src/mesa/vbo/vbo_exec_array.c +++ b/src/mesa/vbo/vbo_exec_array.c @@ -127,6 +127,7 @@ static void recalculate_input_bindings( GLcontext *ctx ) struct vbo_context *vbo = vbo_context(ctx); struct vbo_exec_context *exec = &vbo->exec; const struct gl_client_array **inputs = &exec->array.inputs[0]; + GLuint const_inputs = 0; GLuint i; exec->array.program_mode = get_program_mode(ctx); @@ -141,19 +142,24 @@ static void recalculate_input_bindings( GLcontext *ctx ) for (i = 0; i <= VERT_ATTRIB_TEX7; i++) { if (exec->array.legacy_array[i]->Enabled) inputs[i] = exec->array.legacy_array[i]; - else + else { inputs[i] = &vbo->legacy_currval[i]; + const_inputs |= 1 << i; + } } for (i = 0; i < MAT_ATTRIB_MAX; i++) { inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->mat_currval[i]; + const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i); } /* Could use just about anything, just to fill in the empty * slots: */ - for (i = MAT_ATTRIB_MAX; i < VERT_ATTRIB_MAX - VERT_ATTRIB_GENERIC0; i++) + for (i = MAT_ATTRIB_MAX; i < VERT_ATTRIB_MAX - VERT_ATTRIB_GENERIC0; i++) { inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->generic_currval[i]; + const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i); + } break; case VP_NV: @@ -166,15 +172,19 @@ static void recalculate_input_bindings( GLcontext *ctx ) inputs[i] = exec->array.generic_array[i]; else if (exec->array.legacy_array[i]->Enabled) inputs[i] = exec->array.legacy_array[i]; - else + else { inputs[i] = &vbo->legacy_currval[i]; + const_inputs |= 1 << i; + } } /* Could use just about anything, just to fill in the empty * slots: */ - for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) + for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) { inputs[i] = &vbo->generic_currval[i - VERT_ATTRIB_GENERIC0]; + const_inputs |= 1 << i; + } break; case VP_ARB: @@ -189,25 +199,34 @@ static void recalculate_input_bindings( GLcontext *ctx ) inputs[0] = exec->array.generic_array[0]; else if (exec->array.legacy_array[0]->Enabled) inputs[0] = exec->array.legacy_array[0]; - else + else { inputs[0] = &vbo->legacy_currval[0]; + const_inputs |= 1 << 0; + } for (i = 1; i <= VERT_ATTRIB_TEX7; i++) { if (exec->array.legacy_array[i]->Enabled) inputs[i] = exec->array.legacy_array[i]; - else + else { inputs[i] = &vbo->legacy_currval[i]; + const_inputs |= 1 << i; + } } for (i = 0; i < 16; i++) { if (exec->array.generic_array[i]->Enabled) inputs[VERT_ATTRIB_GENERIC0 + i] = exec->array.generic_array[i]; - else + else { inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->generic_currval[i]; + const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i); + } + } break; } + + _mesa_set_varying_vp_inputs( ctx, ~const_inputs ); } static void bind_arrays( GLcontext *ctx ) @@ -257,6 +276,11 @@ vbo_exec_DrawArrays(GLenum mode, GLint start, GLsizei count) bind_arrays( ctx ); + /* Again... + */ + if (ctx->NewState) + _mesa_update_state( ctx ); + prim[0].begin = 1; prim[0].end = 1; prim[0].weak = 0; @@ -297,6 +321,9 @@ vbo_exec_DrawRangeElements(GLenum mode, bind_arrays( ctx ); + if (ctx->NewState) + _mesa_update_state( ctx ); + ib.count = count; ib.type = type; ib.obj = ctx->Array.ElementArrayBufferObj; diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c index f497e9a5a5..ad60c9b05f 100644 --- a/src/mesa/vbo/vbo_exec_draw.c +++ b/src/mesa/vbo/vbo_exec_draw.c @@ -150,6 +150,7 @@ static void vbo_exec_bind_arrays( GLcontext *ctx ) GLubyte *data = exec->vtx.buffer_map; const GLuint *map; GLuint attr; + GLuint varying_inputs = 0; /* Install the default (ie Current) attributes first, then overlay * all active ones. @@ -211,8 +212,11 @@ static void vbo_exec_bind_arrays( GLcontext *ctx ) arrays[attr]._MaxElement = count; /* ??? */ data += exec->vtx.attrsz[src] * sizeof(GLfloat); + varying_inputs |= 1<NewState) + _mesa_update_state( ctx ); + + ctx->Driver.UnmapBuffer(ctx, target, exec->vtx.bufferobj); exec->vtx.buffer_map = NULL; diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c index 4c97acddb9..b015bf2786 100644 --- a/src/mesa/vbo/vbo_save_draw.c +++ b/src/mesa/vbo/vbo_save_draw.c @@ -118,6 +118,7 @@ static void vbo_bind_vertex_list( GLcontext *ctx, GLuint data = node->buffer_offset; const GLuint *map; GLuint attr; + GLuint varying_inputs = 0; /* Install the default (ie Current) attributes first, then overlay * all active ones. @@ -167,8 +168,11 @@ static void vbo_bind_vertex_list( GLcontext *ctx, assert(arrays[attr].BufferObj->Name); data += node->attrsz[src] * sizeof(GLfloat); + varying_inputs |= 1<