diff options
Diffstat (limited to 'src/gallium/auxiliary')
90 files changed, 6612 insertions, 4131 deletions
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 36c882acb7..80bd0c91db 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -50,20 +50,35 @@ struct cso_context { struct { void *samplers[PIPE_MAX_SAMPLERS]; unsigned nr_samplers; + + void *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS]; + unsigned nr_vertex_samplers; } hw; void *samplers[PIPE_MAX_SAMPLERS]; unsigned nr_samplers; + void *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS]; + unsigned nr_vertex_samplers; + unsigned nr_samplers_saved; void *samplers_saved[PIPE_MAX_SAMPLERS]; + unsigned nr_vertex_samplers_saved; + void *vertex_samplers_saved[PIPE_MAX_VERTEX_SAMPLERS]; + struct pipe_texture *textures[PIPE_MAX_SAMPLERS]; uint nr_textures; + struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS]; + uint nr_vertex_textures; + uint nr_textures_saved; struct pipe_texture *textures_saved[PIPE_MAX_SAMPLERS]; + uint nr_vertex_textures_saved; + struct pipe_texture *vertex_textures_saved[PIPE_MAX_SAMPLERS]; + /** Current and saved state. * The saved state is used as a 1-deep stack. */ @@ -244,7 +259,9 @@ void cso_release_all( struct cso_context *ctx ) if (ctx->pipe) { ctx->pipe->bind_blend_state( ctx->pipe, NULL ); ctx->pipe->bind_rasterizer_state( ctx->pipe, NULL ); - ctx->pipe->bind_sampler_states( ctx->pipe, 0, NULL ); + ctx->pipe->bind_fragment_sampler_states( ctx->pipe, 0, NULL ); + if (ctx->pipe->bind_vertex_sampler_states) + ctx->pipe->bind_vertex_sampler_states(ctx->pipe, 0, NULL); ctx->pipe->bind_depth_stencil_alpha_state( ctx->pipe, NULL ); ctx->pipe->bind_fs_state( ctx->pipe, NULL ); ctx->pipe->bind_vs_state( ctx->pipe, NULL ); @@ -255,6 +272,11 @@ void cso_release_all( struct cso_context *ctx ) pipe_texture_reference(&ctx->textures_saved[i], NULL); } + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + pipe_texture_reference(&ctx->vertex_textures[i], NULL); + pipe_texture_reference(&ctx->vertex_textures_saved[i], NULL); + } + free_framebuffer_state(&ctx->fb); free_framebuffer_state(&ctx->fb_saved); @@ -268,7 +290,7 @@ void cso_release_all( struct cso_context *ctx ) void cso_destroy_context( struct cso_context *ctx ) { if (ctx) { - //cso_release_all( ctx ); + /*cso_release_all( ctx );*/ FREE( ctx ); } } @@ -378,6 +400,46 @@ enum pipe_error cso_single_sampler(struct cso_context *ctx, return PIPE_OK; } +enum pipe_error +cso_single_vertex_sampler(struct cso_context *ctx, + unsigned idx, + const struct pipe_sampler_state *templ) +{ + void *handle = NULL; + + if (templ != NULL) { + unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_sampler_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, CSO_SAMPLER, + (void*)templ); + + if (cso_hash_iter_is_null(iter)) { + struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler)); + if (!cso) + return PIPE_ERROR_OUT_OF_MEMORY; + + memcpy(&cso->state, templ, sizeof(*templ)); + cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_sampler_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_SAMPLER, cso); + if (cso_hash_iter_is_null(iter)) { + FREE(cso); + return PIPE_ERROR_OUT_OF_MEMORY; + } + + handle = cso->data; + } + else { + handle = ((struct cso_sampler *)cso_hash_iter_data(iter))->data; + } + } + + ctx->vertex_samplers[idx] = handle; + return PIPE_OK; +} + void cso_single_sampler_done( struct cso_context *ctx ) { unsigned i; @@ -398,7 +460,36 @@ void cso_single_sampler_done( struct cso_context *ctx ) memcpy(ctx->hw.samplers, ctx->samplers, ctx->nr_samplers * sizeof(void *)); ctx->hw.nr_samplers = ctx->nr_samplers; - ctx->pipe->bind_sampler_states(ctx->pipe, ctx->nr_samplers, ctx->samplers); + ctx->pipe->bind_fragment_sampler_states(ctx->pipe, ctx->nr_samplers, ctx->samplers); + } +} + +void +cso_single_vertex_sampler_done(struct cso_context *ctx) +{ + unsigned i; + + /* find highest non-null sampler */ + for (i = PIPE_MAX_VERTEX_SAMPLERS; i > 0; i--) { + if (ctx->vertex_samplers[i - 1] != NULL) + break; + } + + ctx->nr_vertex_samplers = i; + + if (ctx->hw.nr_vertex_samplers != ctx->nr_vertex_samplers || + memcmp(ctx->hw.vertex_samplers, + ctx->vertex_samplers, + ctx->nr_vertex_samplers * sizeof(void *)) != 0) + { + memcpy(ctx->hw.vertex_samplers, + ctx->vertex_samplers, + ctx->nr_vertex_samplers * sizeof(void *)); + ctx->hw.nr_vertex_samplers = ctx->nr_vertex_samplers; + + ctx->pipe->bind_vertex_sampler_states(ctx->pipe, + ctx->nr_vertex_samplers, + ctx->vertex_samplers); } } @@ -447,6 +538,21 @@ void cso_restore_samplers(struct cso_context *ctx) cso_single_sampler_done( ctx ); } +void +cso_save_vertex_samplers(struct cso_context *ctx) +{ + ctx->nr_vertex_samplers_saved = ctx->nr_vertex_samplers; + memcpy(ctx->vertex_samplers_saved, ctx->vertex_samplers, sizeof(ctx->vertex_samplers)); +} + +void +cso_restore_vertex_samplers(struct cso_context *ctx) +{ + ctx->nr_vertex_samplers = ctx->nr_vertex_samplers_saved; + memcpy(ctx->vertex_samplers, ctx->vertex_samplers_saved, sizeof(ctx->vertex_samplers)); + cso_single_vertex_sampler_done(ctx); +} + enum pipe_error cso_set_sampler_textures( struct cso_context *ctx, uint count, @@ -461,7 +567,7 @@ enum pipe_error cso_set_sampler_textures( struct cso_context *ctx, for ( ; i < PIPE_MAX_SAMPLERS; i++) pipe_texture_reference(&ctx->textures[i], NULL); - ctx->pipe->set_sampler_textures(ctx->pipe, count, textures); + ctx->pipe->set_fragment_sampler_textures(ctx->pipe, count, textures); return PIPE_OK; } @@ -491,13 +597,71 @@ void cso_restore_sampler_textures( struct cso_context *ctx ) for ( ; i < PIPE_MAX_SAMPLERS; i++) pipe_texture_reference(&ctx->textures[i], NULL); - ctx->pipe->set_sampler_textures(ctx->pipe, ctx->nr_textures, ctx->textures); + ctx->pipe->set_fragment_sampler_textures(ctx->pipe, ctx->nr_textures, ctx->textures); ctx->nr_textures_saved = 0; } +enum pipe_error +cso_set_vertex_sampler_textures(struct cso_context *ctx, + uint count, + struct pipe_texture **textures) +{ + uint i; + + ctx->nr_vertex_textures = count; + + for (i = 0; i < count; i++) { + pipe_texture_reference(&ctx->vertex_textures[i], textures[i]); + } + for ( ; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + pipe_texture_reference(&ctx->vertex_textures[i], NULL); + } + + ctx->pipe->set_vertex_sampler_textures(ctx->pipe, count, textures); + + return PIPE_OK; +} + +void +cso_save_vertex_sampler_textures(struct cso_context *ctx) +{ + uint i; + + ctx->nr_vertex_textures_saved = ctx->nr_vertex_textures; + for (i = 0; i < ctx->nr_vertex_textures; i++) { + assert(!ctx->vertex_textures_saved[i]); + pipe_texture_reference(&ctx->vertex_textures_saved[i], ctx->vertex_textures[i]); + } +} + +void +cso_restore_vertex_sampler_textures(struct cso_context *ctx) +{ + uint i; + + ctx->nr_vertex_textures = ctx->nr_vertex_textures_saved; + + for (i = 0; i < ctx->nr_vertex_textures; i++) { + pipe_texture_reference(&ctx->vertex_textures[i], NULL); + ctx->vertex_textures[i] = ctx->vertex_textures_saved[i]; + ctx->vertex_textures_saved[i] = NULL; + } + for ( ; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + pipe_texture_reference(&ctx->vertex_textures[i], NULL); + } + + ctx->pipe->set_vertex_sampler_textures(ctx->pipe, + ctx->nr_vertex_textures, + ctx->vertex_textures); + + ctx->nr_vertex_textures_saved = 0; +} + + + enum pipe_error cso_set_depth_stencil_alpha(struct cso_context *ctx, const struct pipe_depth_stencil_alpha_state *templ) { diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h index b04e98bfa1..e5b92177cf 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.h +++ b/src/gallium/auxiliary/cso_cache/cso_context.h @@ -31,7 +31,7 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "pipe/p_error.h" +#include "pipe/p_defines.h" #ifdef __cplusplus @@ -84,6 +84,20 @@ enum pipe_error cso_single_sampler( struct cso_context *cso, void cso_single_sampler_done( struct cso_context *cso ); +void +cso_save_vertex_samplers(struct cso_context *cso); + +void +cso_restore_vertex_samplers(struct cso_context *cso); + +enum pipe_error +cso_single_vertex_sampler(struct cso_context *cso, + unsigned nr, + const struct pipe_sampler_state *states); + +void +cso_single_vertex_sampler_done(struct cso_context *cso); + enum pipe_error cso_set_sampler_textures( struct cso_context *cso, @@ -94,6 +108,17 @@ void cso_restore_sampler_textures( struct cso_context *cso ); +enum pipe_error +cso_set_vertex_sampler_textures(struct cso_context *cso, + uint count, + struct pipe_texture **textures); +void +cso_save_vertex_sampler_textures(struct cso_context *cso); +void +cso_restore_vertex_sampler_textures(struct cso_context *cso); + + + /* These aren't really sensible -- most of the time the api provides * object semantics for shaders anyway, and the cases where it doesn't * (eg mesa's internall-generated texenv programs), it will be up to diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 9f956715a2..23d8b609e1 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -139,29 +139,29 @@ aa_transform_decl(struct tgsi_transform_context *ctx, struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; if (decl->Declaration.File == TGSI_FILE_OUTPUT && - decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR && - decl->Semantic.SemanticIndex == 0) { - aactx->colorOutput = decl->DeclarationRange.First; + decl->Semantic.Name == TGSI_SEMANTIC_COLOR && + decl->Semantic.Index == 0) { + aactx->colorOutput = decl->Range.First; } else if (decl->Declaration.File == TGSI_FILE_SAMPLER) { uint i; - for (i = decl->DeclarationRange.First; - i <= decl->DeclarationRange.Last; i++) { + for (i = decl->Range.First; + i <= decl->Range.Last; i++) { aactx->samplersUsed |= 1 << i; } } else if (decl->Declaration.File == TGSI_FILE_INPUT) { - if ((int) decl->DeclarationRange.Last > aactx->maxInput) - aactx->maxInput = decl->DeclarationRange.Last; - if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC && - (int) decl->Semantic.SemanticIndex > aactx->maxGeneric) { - aactx->maxGeneric = decl->Semantic.SemanticIndex; + if ((int) decl->Range.Last > aactx->maxInput) + aactx->maxInput = decl->Range.Last; + if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC && + (int) decl->Semantic.Index > aactx->maxGeneric) { + aactx->maxGeneric = decl->Semantic.Index; } } else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { uint i; - for (i = decl->DeclarationRange.First; - i <= decl->DeclarationRange.Last; i++) { + for (i = decl->Range.First; + i <= decl->Range.Last; i++) { aactx->tempsUsed |= (1 << i); } } @@ -228,30 +228,30 @@ aa_transform_inst(struct tgsi_transform_context *ctx, /* XXX this could be linear... */ decl.Declaration.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = aactx->maxGeneric + 1; - decl.DeclarationRange.First = - decl.DeclarationRange.Last = aactx->maxInput + 1; + decl.Semantic.Name = TGSI_SEMANTIC_GENERIC; + decl.Semantic.Index = aactx->maxGeneric + 1; + decl.Range.First = + decl.Range.Last = aactx->maxInput + 1; ctx->emit_declaration(ctx, &decl); /* declare new sampler */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_SAMPLER; - decl.DeclarationRange.First = - decl.DeclarationRange.Last = aactx->freeSampler; + decl.Range.First = + decl.Range.Last = aactx->freeSampler; ctx->emit_declaration(ctx, &decl); /* declare new temp regs */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.DeclarationRange.First = - decl.DeclarationRange.Last = aactx->texTemp; + decl.Range.First = + decl.Range.Last = aactx->texTemp; ctx->emit_declaration(ctx, &decl); decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.DeclarationRange.First = - decl.DeclarationRange.Last = aactx->colorTemp; + decl.Range.First = + decl.Range.Last = aactx->colorTemp; ctx->emit_declaration(ctx, &decl); aactx->firstInstruction = FALSE; @@ -265,14 +265,15 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_TEX; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = aactx->texTemp; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = aactx->texTemp; newInst.Instruction.NumSrcRegs = 2; - newInst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->maxInput + 1; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->freeSampler; + newInst.Instruction.Texture = TRUE; + newInst.Texture.Texture = TGSI_TEXTURE_2D; + newInst.Src[0].Register.File = TGSI_FILE_INPUT; + newInst.Src[0].Register.Index = aactx->maxInput + 1; + newInst.Src[1].Register.File = TGSI_FILE_SAMPLER; + newInst.Src[1].Register.Index = aactx->freeSampler; ctx->emit_instruction(ctx, &newInst); @@ -280,26 +281,26 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_MOV; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ; + newInst.Dst[0].Register.File = TGSI_FILE_OUTPUT; + newInst.Dst[0].Register.Index = aactx->colorOutput; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZ; newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = aactx->colorTemp; ctx->emit_instruction(ctx, &newInst); /* MUL alpha */ newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_MUL; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Dst[0].Register.File = TGSI_FILE_OUTPUT; + newInst.Dst[0].Register.Index = aactx->colorOutput; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W; newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->texTemp; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = aactx->colorTemp; + newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[1].Register.Index = aactx->texTemp; ctx->emit_instruction(ctx, &newInst); /* END */ @@ -316,11 +317,11 @@ aa_transform_inst(struct tgsi_transform_context *ctx, uint i; for (i = 0; i < inst->Instruction.NumDstRegs; i++) { - struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; - if (dst->DstRegister.File == TGSI_FILE_OUTPUT && - dst->DstRegister.Index == aactx->colorOutput) { - dst->DstRegister.File = TGSI_FILE_TEMPORARY; - dst->DstRegister.Index = aactx->colorTemp; + struct tgsi_full_dst_register *dst = &inst->Dst[i]; + if (dst->Register.File == TGSI_FILE_OUTPUT && + dst->Register.Index == aactx->colorOutput) { + dst->Register.File = TGSI_FILE_TEMPORARY; + dst->Register.Index = aactx->colorTemp; } } @@ -398,10 +399,9 @@ aaline_create_texture(struct aaline_stage *aaline) texTemp.target = PIPE_TEXTURE_2D; texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */ texTemp.last_level = MAX_TEXTURE_LEVEL; - texTemp.width[0] = 1 << MAX_TEXTURE_LEVEL; - texTemp.height[0] = 1 << MAX_TEXTURE_LEVEL; - texTemp.depth[0] = 1; - pf_get_block(texTemp.format, &texTemp.block); + texTemp.width0 = 1 << MAX_TEXTURE_LEVEL; + texTemp.height0 = 1 << MAX_TEXTURE_LEVEL; + texTemp.depth0 = 1; aaline->texture = screen->texture_create(screen, &texTemp); if (!aaline->texture) @@ -413,11 +413,11 @@ aaline_create_texture(struct aaline_stage *aaline) */ for (level = 0; level <= MAX_TEXTURE_LEVEL; level++) { struct pipe_transfer *transfer; - const uint size = aaline->texture->width[level]; + const uint size = u_minify(aaline->texture->width0, level); ubyte *data; uint i, j; - assert(aaline->texture->width[level] == aaline->texture->height[level]); + assert(aaline->texture->width0 == aaline->texture->height0); /* This texture is new, no need to flush. */ @@ -896,16 +896,16 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe) aaline->driver_bind_fs_state = pipe->bind_fs_state; aaline->driver_delete_fs_state = pipe->delete_fs_state; - aaline->driver_bind_sampler_states = pipe->bind_sampler_states; - aaline->driver_set_sampler_textures = pipe->set_sampler_textures; + aaline->driver_bind_sampler_states = pipe->bind_fragment_sampler_states; + aaline->driver_set_sampler_textures = pipe->set_fragment_sampler_textures; /* override the driver's functions */ pipe->create_fs_state = aaline_create_fs_state; pipe->bind_fs_state = aaline_bind_fs_state; pipe->delete_fs_state = aaline_delete_fs_state; - pipe->bind_sampler_states = aaline_bind_sampler_states; - pipe->set_sampler_textures = aaline_set_sampler_textures; + pipe->bind_fragment_sampler_states = aaline_bind_sampler_states; + pipe->set_fragment_sampler_textures = aaline_set_sampler_textures; /* Install once everything is known to be OK: */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index ae1712fe12..75130a8fb0 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -131,22 +131,22 @@ aa_transform_decl(struct tgsi_transform_context *ctx, struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; if (decl->Declaration.File == TGSI_FILE_OUTPUT && - decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR && - decl->Semantic.SemanticIndex == 0) { - aactx->colorOutput = decl->DeclarationRange.First; + decl->Semantic.Name == TGSI_SEMANTIC_COLOR && + decl->Semantic.Index == 0) { + aactx->colorOutput = decl->Range.First; } else if (decl->Declaration.File == TGSI_FILE_INPUT) { - if ((int) decl->DeclarationRange.Last > aactx->maxInput) - aactx->maxInput = decl->DeclarationRange.Last; - if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC && - (int) decl->Semantic.SemanticIndex > aactx->maxGeneric) { - aactx->maxGeneric = decl->Semantic.SemanticIndex; + if ((int) decl->Range.Last > aactx->maxInput) + aactx->maxInput = decl->Range.Last; + if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC && + (int) decl->Semantic.Index > aactx->maxGeneric) { + aactx->maxGeneric = decl->Semantic.Index; } } else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { uint i; - for (i = decl->DeclarationRange.First; - i <= decl->DeclarationRange.Last; i++) { + for (i = decl->Range.First; + i <= decl->Range.Last; i++) { aactx->tempsUsed |= (1 << i); } } @@ -198,23 +198,23 @@ aa_transform_inst(struct tgsi_transform_context *ctx, /* XXX this could be linear... */ decl.Declaration.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = aactx->maxGeneric + 1; - decl.DeclarationRange.First = - decl.DeclarationRange.Last = texInput; + decl.Semantic.Name = TGSI_SEMANTIC_GENERIC; + decl.Semantic.Index = aactx->maxGeneric + 1; + decl.Range.First = + decl.Range.Last = texInput; ctx->emit_declaration(ctx, &decl); /* declare new temp regs */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.DeclarationRange.First = - decl.DeclarationRange.Last = tmp0; + decl.Range.First = + decl.Range.Last = tmp0; ctx->emit_declaration(ctx, &decl); decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.DeclarationRange.First = - decl.DeclarationRange.Last = aactx->colorTemp; + decl.Range.First = + decl.Range.Last = aactx->colorTemp; ctx->emit_declaration(ctx, &decl); aactx->firstInstruction = FALSE; @@ -234,30 +234,30 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_MUL; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XY; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XY; newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + newInst.Src[0].Register.File = TGSI_FILE_INPUT; + newInst.Src[0].Register.Index = texInput; + newInst.Src[1].Register.File = TGSI_FILE_INPUT; + newInst.Src[1].Register.Index = texInput; ctx->emit_instruction(ctx, &newInst); /* ADD t0.x, t0.x, t0.y; # x^2 + y^2 */ newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_ADD; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X; newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = tmp0; + newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; + newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[1].Register.Index = tmp0; + newInst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_Y; ctx->emit_instruction(ctx, &newInst); #if NORMALIZE /* OPTIONAL normalization of length */ @@ -265,24 +265,24 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_RSQ; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X; newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = tmp0; ctx->emit_instruction(ctx, &newInst); /* RCP t0.x, t0.x; */ newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_RCP; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X; newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = tmp0; ctx->emit_instruction(ctx, &newInst); #endif @@ -290,16 +290,16 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_SGT; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y; newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = tmp0; + newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; + newInst.Src[1].Register.File = TGSI_FILE_INPUT; + newInst.Src[1].Register.Index = texInput; + newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_W; ctx->emit_instruction(ctx, &newInst); /* KIL -tmp0.yyyy; # if -tmp0.y < 0, KILL */ @@ -307,13 +307,13 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst.Instruction.Opcode = TGSI_OPCODE_KIL; newInst.Instruction.NumDstRegs = 0; newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = tmp0; + newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Y; + newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y; + newInst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Y; + newInst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Y; + newInst.Src[0].Register.Negate = 1; ctx->emit_instruction(ctx, &newInst); @@ -323,77 +323,77 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_SUB; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Z; newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z; + newInst.Src[0].Register.File = TGSI_FILE_INPUT; + newInst.Src[0].Register.Index = texInput; + newInst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_W; + newInst.Src[1].Register.File = TGSI_FILE_INPUT; + newInst.Src[1].Register.Index = texInput; + newInst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z; ctx->emit_instruction(ctx, &newInst); /* RCP t0.z, t0.z; # t0.z = 1 / m */ newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_RCP; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Z; newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Z; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = tmp0; + newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Z; ctx->emit_instruction(ctx, &newInst); /* SUB t0.y, 1, t0.x; # d = 1 - d */ newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_SUB; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y; newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + newInst.Src[0].Register.File = TGSI_FILE_INPUT; + newInst.Src[0].Register.Index = texInput; + newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_W; + newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[1].Register.Index = tmp0; + newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_X; ctx->emit_instruction(ctx, &newInst); /* MUL t0.w, t0.y, t0.z; # coverage = d * m */ newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_MUL; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W; newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Z; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = tmp0; + newInst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Y; + newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[1].Register.Index = tmp0; + newInst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_Z; ctx->emit_instruction(ctx, &newInst); /* SLE t0.y, t0.x, tex.z; # bool b = distance <= k */ newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_SLE; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y; newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Z; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = tmp0; + newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; + newInst.Src[1].Register.File = TGSI_FILE_INPUT; + newInst.Src[1].Register.Index = texInput; + newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Z; ctx->emit_instruction(ctx, &newInst); /* CMP t0.w, -t0.y, tex.w, t0.w; @@ -405,29 +405,29 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_CMP; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W; newInst.Instruction.NumSrcRegs = 3; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[2].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[2].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[2].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[2].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[2].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = tmp0; + newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Y; + newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y; + newInst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Y; + newInst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Y; + newInst.Src[0].Register.Negate = 1; + newInst.Src[1].Register.File = TGSI_FILE_INPUT; + newInst.Src[1].Register.Index = texInput; + newInst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_W; + newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_W; + newInst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_W; + newInst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W; + newInst.Src[2].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[2].Register.Index = tmp0; + newInst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_W; + newInst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_W; + newInst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_W; + newInst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W; ctx->emit_instruction(ctx, &newInst); } @@ -439,26 +439,26 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_MOV; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ; + newInst.Dst[0].Register.File = TGSI_FILE_OUTPUT; + newInst.Dst[0].Register.Index = aactx->colorOutput; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZ; newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = aactx->colorTemp; ctx->emit_instruction(ctx, &newInst); /* MUL result.color.w, colorTemp, tmp0.w; */ newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_MUL; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Dst[0].Register.File = TGSI_FILE_OUTPUT; + newInst.Dst[0].Register.Index = aactx->colorOutput; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W; newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->tmp0; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = aactx->colorTemp; + newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[1].Register.Index = aactx->tmp0; ctx->emit_instruction(ctx, &newInst); } else { @@ -468,11 +468,11 @@ aa_transform_inst(struct tgsi_transform_context *ctx, uint i; for (i = 0; i < inst->Instruction.NumDstRegs; i++) { - struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; - if (dst->DstRegister.File == TGSI_FILE_OUTPUT && - dst->DstRegister.Index == aactx->colorOutput) { - dst->DstRegister.File = TGSI_FILE_TEMPORARY; - dst->DstRegister.Index = aactx->colorTemp; + struct tgsi_full_dst_register *dst = &inst->Dst[i]; + if (dst->Register.File == TGSI_FILE_OUTPUT && + dst->Register.Index == aactx->colorOutput) { + dst->Register.File = TGSI_FILE_TEMPORARY; + dst->Register.Index = aactx->colorTemp; } } } diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index 283502cdf3..bcb860da2e 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -133,20 +133,20 @@ pstip_transform_decl(struct tgsi_transform_context *ctx, if (decl->Declaration.File == TGSI_FILE_SAMPLER) { uint i; - for (i = decl->DeclarationRange.First; - i <= decl->DeclarationRange.Last; i++) { + for (i = decl->Range.First; + i <= decl->Range.Last; i++) { pctx->samplersUsed |= 1 << i; } } else if (decl->Declaration.File == TGSI_FILE_INPUT) { - pctx->maxInput = MAX2(pctx->maxInput, (int) decl->DeclarationRange.Last); - if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) - pctx->wincoordInput = (int) decl->DeclarationRange.First; + pctx->maxInput = MAX2(pctx->maxInput, (int) decl->Range.Last); + if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) + pctx->wincoordInput = (int) decl->Range.First; } else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { uint i; - for (i = decl->DeclarationRange.First; - i <= decl->DeclarationRange.Last; i++) { + for (i = decl->Range.First; + i <= decl->Range.Last; i++) { pctx->tempsUsed |= (1 << i); } } @@ -226,25 +226,25 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, decl.Declaration.File = TGSI_FILE_INPUT; decl.Declaration.Interpolate = TGSI_INTERPOLATE_LINEAR; /* XXX? */ decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_POSITION; - decl.Semantic.SemanticIndex = 0; - decl.DeclarationRange.First = - decl.DeclarationRange.Last = wincoordInput; + decl.Semantic.Name = TGSI_SEMANTIC_POSITION; + decl.Semantic.Index = 0; + decl.Range.First = + decl.Range.Last = wincoordInput; ctx->emit_declaration(ctx, &decl); } /* declare new sampler */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_SAMPLER; - decl.DeclarationRange.First = - decl.DeclarationRange.Last = pctx->freeSampler; + decl.Range.First = + decl.Range.Last = pctx->freeSampler; ctx->emit_declaration(ctx, &decl); /* declare new temp regs */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.DeclarationRange.First = - decl.DeclarationRange.Last = pctx->texTemp; + decl.Range.First = + decl.Range.Last = pctx->texTemp; ctx->emit_declaration(ctx, &decl); /* emit immediate = {1/32, 1/32, 1, 1} @@ -280,27 +280,28 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_MUL; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = pctx->texTemp; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = pctx->texTemp; newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[0].SrcRegister.Index = wincoordInput; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_IMMEDIATE; - newInst.FullSrcRegisters[1].SrcRegister.Index = pctx->numImmed; + newInst.Src[0].Register.File = TGSI_FILE_INPUT; + newInst.Src[0].Register.Index = wincoordInput; + newInst.Src[1].Register.File = TGSI_FILE_IMMEDIATE; + newInst.Src[1].Register.Index = pctx->numImmed; ctx->emit_instruction(ctx, &newInst); /* TEX texTemp, texTemp, sampler; */ newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_TEX; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = pctx->texTemp; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = pctx->texTemp; newInst.Instruction.NumSrcRegs = 2; - newInst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = pctx->texTemp; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - newInst.FullSrcRegisters[1].SrcRegister.Index = pctx->freeSampler; + newInst.Instruction.Texture = TRUE; + newInst.Texture.Texture = TGSI_TEXTURE_2D; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = pctx->texTemp; + newInst.Src[1].Register.File = TGSI_FILE_SAMPLER; + newInst.Src[1].Register.Index = pctx->freeSampler; ctx->emit_instruction(ctx, &newInst); /* KIL -texTemp; # if -texTemp < 0, KILL fragment */ @@ -308,9 +309,9 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, newInst.Instruction.Opcode = TGSI_OPCODE_KIL; newInst.Instruction.NumDstRegs = 0; newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = pctx->texTemp; - newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = pctx->texTemp; + newInst.Src[0].Register.Negate = 1; ctx->emit_instruction(ctx, &newInst); } @@ -427,10 +428,9 @@ pstip_create_texture(struct pstip_stage *pstip) texTemp.target = PIPE_TEXTURE_2D; texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */ texTemp.last_level = 0; - texTemp.width[0] = 32; - texTemp.height[0] = 32; - texTemp.depth[0] = 1; - pf_get_block(texTemp.format, &texTemp.block); + texTemp.width0 = 32; + texTemp.height0 = 32; + texTemp.depth0 = 1; pstip->texture = screen->texture_create(screen, &texTemp); if (pstip->texture == NULL) @@ -754,8 +754,8 @@ draw_install_pstipple_stage(struct draw_context *draw, pstip->driver_bind_fs_state = pipe->bind_fs_state; pstip->driver_delete_fs_state = pipe->delete_fs_state; - pstip->driver_bind_sampler_states = pipe->bind_sampler_states; - pstip->driver_set_sampler_textures = pipe->set_sampler_textures; + pstip->driver_bind_sampler_states = pipe->bind_fragment_sampler_states; + pstip->driver_set_sampler_textures = pipe->set_fragment_sampler_textures; pstip->driver_set_polygon_stipple = pipe->set_polygon_stipple; /* override the driver's functions */ @@ -763,8 +763,8 @@ draw_install_pstipple_stage(struct draw_context *draw, pipe->bind_fs_state = pstip_bind_fs_state; pipe->delete_fs_state = pstip_delete_fs_state; - pipe->bind_sampler_states = pstip_bind_sampler_states; - pipe->set_sampler_textures = pstip_set_sampler_textures; + pipe->bind_fragment_sampler_states = pstip_bind_sampler_states; + pipe->set_fragment_sampler_textures = pstip_set_sampler_textures; pipe->set_polygon_stipple = pstip_set_polygon_stipple; return TRUE; diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index dbb5ac7182..4865a2d854 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -192,7 +192,8 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count) prim, start, count); for (i = 0; i < count; i++) { - uint ii, j; + uint ii = 0; + uint j; if (draw->pt.user.elts) { /* indexed arrays */ diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 44147aed9b..734c05f068 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -212,17 +212,10 @@ static void fse_prepare( struct draw_pt_middle_end *middle, struct draw_vertex_shader *vs = draw->vs.vertex_shader; vs->prepare(vs, draw); } - - - //return TRUE; } - - - - static void fse_run_linear( struct draw_pt_middle_end *middle, unsigned start, unsigned count ) diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index 00d7197b13..6c1cb48e8b 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -104,7 +104,7 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, unsigned clipped = 0; unsigned j; - if (0) debug_printf("%s\n"); + if (0) debug_printf("%s\n", __FUNCTION__); for (j = 0; j < count; j++) { float *position = out->data[pos]; @@ -210,7 +210,7 @@ void draw_pt_post_vs_prepare( struct pt_post_vs *pvs, pvs->run = post_vs_viewport; } else { - //if (opengl) + /* if (opengl) */ pvs->run = post_vs_cliptest_viewport_gl; } } diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 1a0527be63..757c487454 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -346,7 +346,8 @@ vcache_check_run( struct draw_pt_front_end *frontend, vcache->fetch_max, draw_count); - if (max_index == 0xffffffff || + if (max_index >= DRAW_PIPE_MAX_VERTICES || + fetch_count >= UNDEFINED_VERTEX_ID || fetch_count > draw_count) { if (0) debug_printf("fail\n"); goto fail; @@ -394,6 +395,7 @@ vcache_check_run( struct draw_pt_front_end *frontend, default: assert(0); + FREE(storage); return; } } @@ -422,6 +424,7 @@ vcache_check_run( struct draw_pt_front_end *frontend, default: assert(0); + FREE(storage); return; } } diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 62e04a65f3..1aaae4ab7a 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -361,8 +361,8 @@ static struct x86_reg aos_get_shader_reg_ptr( struct aos_compilation *cp, static struct x86_reg get_dst_ptr( struct aos_compilation *cp, const struct tgsi_full_dst_register *dst ) { - unsigned file = dst->DstRegister.File; - unsigned idx = dst->DstRegister.Index; + unsigned file = dst->Register.File; + unsigned idx = dst->Register.Index; unsigned i; @@ -529,27 +529,18 @@ static struct x86_reg fetch_src( struct aos_compilation *cp, const struct tgsi_full_src_register *src ) { struct x86_reg arg0 = aos_get_shader_reg(cp, - src->SrcRegister.File, - src->SrcRegister.Index); + src->Register.File, + src->Register.Index); unsigned i; ubyte swz = 0; unsigned negs = 0; unsigned abs = 0; for (i = 0; i < 4; i++) { - unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( src, i ); + unsigned swizzle = tgsi_util_get_full_src_register_swizzle( src, i ); unsigned neg = tgsi_util_get_full_src_register_sign_mode( src, i ); - switch (swizzle) { - case TGSI_EXTSWIZZLE_ZERO: - case TGSI_EXTSWIZZLE_ONE: - AOS_ERROR(cp, "not supporting full swizzles yet in tgsi_aos_sse2"); - break; - - default: - swz |= (swizzle & 0x3) << (i * 2); - break; - } + swz |= (swizzle & 0x3) << (i * 2); switch (neg) { case TGSI_UTIL_SIGN_TOGGLE: @@ -629,26 +620,13 @@ static void x87_fld_src( struct aos_compilation *cp, unsigned channel ) { struct x86_reg arg0 = aos_get_shader_reg_ptr(cp, - src->SrcRegister.File, - src->SrcRegister.Index); + src->Register.File, + src->Register.Index); - unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( src, channel ); + unsigned swizzle = tgsi_util_get_full_src_register_swizzle( src, channel ); unsigned neg = tgsi_util_get_full_src_register_sign_mode( src, channel ); - switch (swizzle) { - case TGSI_EXTSWIZZLE_ZERO: - x87_fldz( cp->func ); - break; - - case TGSI_EXTSWIZZLE_ONE: - x87_fld1( cp->func ); - break; - - default: - x87_fld( cp->func, x86_make_disp(arg0, (swizzle & 3) * sizeof(float)) ); - break; - } - + x87_fld( cp->func, x86_make_disp(arg0, (swizzle & 3) * sizeof(float)) ); switch (neg) { case TGSI_UTIL_SIGN_TOGGLE: @@ -691,15 +669,15 @@ static void store_dest( struct aos_compilation *cp, { struct x86_reg dst; - switch (reg->DstRegister.WriteMask) { + switch (reg->Register.WriteMask) { case 0: return; case TGSI_WRITEMASK_XYZW: aos_adopt_xmm_reg(cp, get_xmm_writable(cp, result), - reg->DstRegister.File, - reg->DstRegister.Index, + reg->Register.File, + reg->Register.Index, TRUE); return; default: @@ -707,10 +685,10 @@ static void store_dest( struct aos_compilation *cp, } dst = aos_get_shader_reg_xmm(cp, - reg->DstRegister.File, - reg->DstRegister.Index); + reg->Register.File, + reg->Register.Index); - switch (reg->DstRegister.WriteMask) { + switch (reg->Register.WriteMask) { case TGSI_WRITEMASK_X: sse_movss(cp->func, dst, get_xmm(cp, result)); break; @@ -732,14 +710,14 @@ static void store_dest( struct aos_compilation *cp, break; default: - mask_write(cp, dst, result, reg->DstRegister.WriteMask); + mask_write(cp, dst, result, reg->Register.WriteMask); break; } aos_adopt_xmm_reg(cp, dst, - reg->DstRegister.File, - reg->DstRegister.Index, + reg->Register.File, + reg->Register.Index, TRUE); } @@ -759,7 +737,7 @@ static void store_scalar_dest( struct aos_compilation *cp, const struct tgsi_full_dst_register *reg, struct x86_reg result ) { - unsigned writemask = reg->DstRegister.WriteMask; + unsigned writemask = reg->Register.WriteMask; struct x86_reg dst; if (writemask != TGSI_WRITEMASK_X && @@ -776,12 +754,12 @@ static void store_scalar_dest( struct aos_compilation *cp, result = get_xmm(cp, result); dst = aos_get_shader_reg_xmm(cp, - reg->DstRegister.File, - reg->DstRegister.Index); + reg->Register.File, + reg->Register.Index); - switch (reg->DstRegister.WriteMask) { + switch (reg->Register.WriteMask) { case TGSI_WRITEMASK_X: sse_movss(cp->func, dst, result); break; @@ -804,8 +782,8 @@ static void store_scalar_dest( struct aos_compilation *cp, aos_adopt_xmm_reg(cp, dst, - reg->DstRegister.File, - reg->DstRegister.Index, + reg->Register.File, + reg->Register.Index, TRUE); } @@ -841,7 +819,7 @@ static void x87_fstp_dest4( struct aos_compilation *cp, const struct tgsi_full_dst_register *dst ) { struct x86_reg ptr = get_dst_ptr(cp, dst); - unsigned writemask = dst->DstRegister.WriteMask; + unsigned writemask = dst->Register.WriteMask; x87_fst_or_nop(cp->func, writemask, 0, ptr); x87_fst_or_nop(cp->func, writemask, 1, ptr); @@ -891,7 +869,7 @@ static void x87_emit_ex2( struct aos_compilation *cp ) struct x86_reg st1 = x86_make_reg(file_x87, 1); int stack = cp->func->x87_stack; -// set_fpu_round_neg_inf( cp ); + /* set_fpu_round_neg_inf( cp ); */ x87_fld(cp->func, st0); /* a a */ x87_fprndint( cp->func ); /* int(a) a*/ @@ -978,7 +956,7 @@ static void emit_print( struct aos_compilation *cp, static boolean emit_ABS( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); struct x86_reg neg = aos_get_internal(cp, IMM_NEGS); struct x86_reg tmp = aos_get_xmm_reg(cp); @@ -986,27 +964,27 @@ static boolean emit_ABS( struct aos_compilation *cp, const struct tgsi_full_inst sse_mulps(cp->func, tmp, neg); sse_maxps(cp->func, tmp, arg0); - store_dest(cp, &op->FullDstRegisters[0], tmp); + store_dest(cp, &op->Dst[0], tmp); return TRUE; } static boolean emit_ADD( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg dst = get_xmm_writable(cp, arg0); sse_addps(cp->func, dst, arg1); - store_dest(cp, &op->FullDstRegisters[0], dst); + store_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_COS( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - x87_fld_src(cp, &op->FullSrcRegisters[0], 0); + x87_fld_src(cp, &op->Src[0], 0); x87_fcos(cp->func); - x87_fstp_dest4(cp, &op->FullDstRegisters[0]); + x87_fstp_dest4(cp, &op->Dst[0]); return TRUE; } @@ -1015,8 +993,8 @@ static boolean emit_COS( struct aos_compilation *cp, const struct tgsi_full_inst */ static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg tmp = aos_get_xmm_reg(cp); struct x86_reg dst = get_xmm_writable(cp, arg0); @@ -1029,14 +1007,14 @@ static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_inst sse_addss(cp->func, dst, tmp); aos_release_xmm_reg(cp, tmp.idx); - store_scalar_dest(cp, &op->FullDstRegisters[0], dst); + store_scalar_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_DP4( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg tmp = aos_get_xmm_reg(cp); struct x86_reg dst = get_xmm_writable(cp, arg0); @@ -1050,14 +1028,14 @@ static boolean emit_DP4( struct aos_compilation *cp, const struct tgsi_full_inst sse_addss(cp->func, dst, tmp); aos_release_xmm_reg(cp, tmp.idx); - store_scalar_dest(cp, &op->FullDstRegisters[0], dst); + store_scalar_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_DPH( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg tmp = aos_get_xmm_reg(cp); struct x86_reg dst = get_xmm_writable(cp, arg0); @@ -1073,14 +1051,14 @@ static boolean emit_DPH( struct aos_compilation *cp, const struct tgsi_full_inst sse_addss(cp->func, dst, tmp); aos_release_xmm_reg(cp, tmp.idx); - store_scalar_dest(cp, &op->FullDstRegisters[0], dst); + store_scalar_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_DST( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg dst = aos_get_xmm_reg(cp); struct x86_reg tmp = aos_get_xmm_reg(cp); struct x86_reg ones = aos_get_internal(cp, IMM_ONES); @@ -1095,25 +1073,25 @@ static boolean emit_DST( struct aos_compilation *cp, const struct tgsi_full_inst sse_mulps(cp->func, dst, tmp); aos_release_xmm_reg(cp, tmp.idx); - store_dest(cp, &op->FullDstRegisters[0], dst); + store_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_LG2( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { x87_fld1(cp->func); /* 1 */ - x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0 1 */ + x87_fld_src(cp, &op->Src[0], 0); /* a0 1 */ x87_fyl2x(cp->func); /* log2(a0) */ - x87_fstp_dest4(cp, &op->FullDstRegisters[0]); + x87_fstp_dest4(cp, &op->Dst[0]); return TRUE; } #if 0 static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - x87_fld_src(cp, &op->FullSrcRegisters[0], 0); + x87_fld_src(cp, &op->Src[0], 0); x87_emit_ex2(cp); - x87_fstp_dest4(cp, &op->FullDstRegisters[0]); + x87_fstp_dest4(cp, &op->Dst[0]); return TRUE; } #endif @@ -1121,8 +1099,8 @@ static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_inst static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); - unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; + struct x86_reg dst = get_dst_ptr(cp, &op->Dst[0]); + unsigned writemask = op->Dst[0].Register.WriteMask; int i; set_fpu_round_neg_inf( cp ); @@ -1131,7 +1109,7 @@ static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_inst */ for (i = 3; i >= 0; i--) { if (writemask & (1<<i)) { - x87_fld_src(cp, &op->FullSrcRegisters[0], i); + x87_fld_src(cp, &op->Src[0], i); } } @@ -1148,8 +1126,8 @@ static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_inst static boolean emit_RND( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); - unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; + struct x86_reg dst = get_dst_ptr(cp, &op->Dst[0]); + unsigned writemask = op->Dst[0].Register.WriteMask; int i; set_fpu_round_nearest( cp ); @@ -1158,7 +1136,7 @@ static boolean emit_RND( struct aos_compilation *cp, const struct tgsi_full_inst */ for (i = 3; i >= 0; i--) { if (writemask & (1<<i)) { - x87_fld_src(cp, &op->FullSrcRegisters[0], i); + x87_fld_src(cp, &op->Src[0], i); } } @@ -1175,10 +1153,10 @@ static boolean emit_RND( struct aos_compilation *cp, const struct tgsi_full_inst static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); + struct x86_reg dst = get_dst_ptr(cp, &op->Dst[0]); struct x86_reg st0 = x86_make_reg(file_x87, 0); struct x86_reg st1 = x86_make_reg(file_x87, 1); - unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; + unsigned writemask = op->Dst[0].Register.WriteMask; int i; set_fpu_round_neg_inf( cp ); @@ -1188,7 +1166,7 @@ static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_inst */ for (i = 3; i >= 0; i--) { if (writemask & (1<<i)) { - x87_fld_src(cp, &op->FullSrcRegisters[0], i); + x87_fld_src(cp, &op->Src[0], i); } } @@ -1212,7 +1190,7 @@ static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_inst static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); - unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; + unsigned writemask = op->Dst[0].Register.WriteMask; unsigned lit_count = cp->lit_count++; struct x86_reg result, arg0; unsigned i; @@ -1231,10 +1209,10 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst if (writemask != TGSI_WRITEMASK_XYZW) result = x86_make_disp(cp->machine_EDX, Offset(struct aos_machine, tmp[0])); else - result = get_dst_ptr(cp, &op->FullDstRegisters[0]); + result = get_dst_ptr(cp, &op->Dst[0]); - arg0 = fetch_src( cp, &op->FullSrcRegisters[0] ); + arg0 = fetch_src( cp, &op->Src[0] ); if (arg0.file == file_XMM) { struct x86_reg tmp = x86_make_disp(cp->machine_EDX, Offset(struct aos_machine, tmp[1])); @@ -1281,7 +1259,7 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst if (writemask != TGSI_WRITEMASK_XYZW) { store_dest( cp, - &op->FullDstRegisters[0], + &op->Dst[0], get_xmm_writable( cp, result ) ); } @@ -1291,8 +1269,8 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst #if 0 static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); - unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; + struct x86_reg dst = get_dst_ptr(cp, &op->Dst[0]); + unsigned writemask = op->Dst[0].Register.WriteMask; if (writemask & TGSI_WRITEMASK_YZ) { struct x86_reg st1 = x86_make_reg(file_x87, 1); @@ -1308,13 +1286,13 @@ static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_fu */ x87_fldz(cp->func); /* 1 0 */ #endif - x87_fld_src(cp, &op->FullSrcRegisters[0], 1); /* a1 1 0 */ + x87_fld_src(cp, &op->Src[0], 1); /* a1 1 0 */ x87_fcomi(cp->func, st2); /* a1 1 0 */ x87_fcmovb(cp->func, st1); /* a1' 1 0 */ x87_fstp(cp->func, st1); /* a1' 0 */ x87_fstp(cp->func, st1); /* a1' */ - x87_fld_src(cp, &op->FullSrcRegisters[0], 3); /* a3 a1' */ + x87_fld_src(cp, &op->Src[0], 3); /* a3 a1' */ x87_fxch(cp->func, st1); /* a1' a3 */ @@ -1327,7 +1305,7 @@ static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_fu /* a0' = max2(a0, 0): */ x87_fldz(cp->func); /* 0 r2 */ - x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0 0 r2 */ + x87_fld_src(cp, &op->Src[0], 0); /* a0 0 r2 */ x87_fcomi(cp->func, st1); x87_fcmovb(cp->func, st1); /* a0' 0 r2 */ @@ -1355,58 +1333,58 @@ static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_fu static boolean emit_MAX( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg dst = get_xmm_writable(cp, arg0); sse_maxps(cp->func, dst, arg1); - store_dest(cp, &op->FullDstRegisters[0], dst); + store_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_MIN( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg dst = get_xmm_writable(cp, arg0); sse_minps(cp->func, dst, arg1); - store_dest(cp, &op->FullDstRegisters[0], dst); + store_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_MOV( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); struct x86_reg dst = get_xmm_writable(cp, arg0); /* potentially nothing to do */ - store_dest(cp, &op->FullDstRegisters[0], dst); + store_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_MUL( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg dst = get_xmm_writable(cp, arg0); sse_mulps(cp->func, dst, arg1); - store_dest(cp, &op->FullDstRegisters[0], dst); + store_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); - struct x86_reg arg2 = fetch_src(cp, &op->FullSrcRegisters[2]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); + struct x86_reg arg2 = fetch_src(cp, &op->Src[2]); /* If we can't clobber old contents of arg0, get a temporary & copy * it there, then clobber it... @@ -1415,7 +1393,7 @@ static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_inst sse_mulps(cp->func, arg0, arg1); sse_addps(cp->func, arg0, arg2); - store_dest(cp, &op->FullDstRegisters[0], arg0); + store_dest(cp, &op->Dst[0], arg0); return TRUE; } @@ -1447,13 +1425,13 @@ static float PIPE_CDECL _exp2(float x) static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { #if 0 - x87_fld_src(cp, &op->FullSrcRegisters[1], 0); /* a1.x */ - x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0.x a1.x */ + x87_fld_src(cp, &op->Src[1], 0); /* a1.x */ + x87_fld_src(cp, &op->Src[0], 0); /* a0.x a1.x */ x87_fyl2x(cp->func); /* a1*log2(a0) */ x87_emit_ex2( cp ); /* 2^(a1*log2(a0)) */ - x87_fstp_dest4(cp, &op->FullDstRegisters[0]); + x87_fstp_dest4(cp, &op->Dst[0]); #else uint i; @@ -1472,9 +1450,9 @@ static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_inst x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, -8) ); - x87_fld_src( cp, &op->FullSrcRegisters[1], 0 ); + x87_fld_src( cp, &op->Src[1], 0 ); x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 4 ) ); - x87_fld_src( cp, &op->FullSrcRegisters[0], 0 ); + x87_fld_src( cp, &op->Src[0], 0 ); x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 0 ) ); /* tmp_EAX has been pushed & will be restored below */ @@ -1489,7 +1467,7 @@ static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_inst */ cp->func->x87_stack++; - x87_fstp_dest4( cp, &op->FullDstRegisters[0] ); + x87_fstp_dest4( cp, &op->Dst[0] ); #endif return TRUE; } @@ -1515,7 +1493,7 @@ static boolean emit_EXPBASE2( struct aos_compilation *cp, const struct tgsi_full x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, -4) ); - x87_fld_src( cp, &op->FullSrcRegisters[0], 0 ); + x87_fld_src( cp, &op->Src[0], 0 ); x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 0 ) ); /* tmp_EAX has been pushed & will be restored below */ @@ -1530,7 +1508,7 @@ static boolean emit_EXPBASE2( struct aos_compilation *cp, const struct tgsi_full */ cp->func->x87_stack++; - x87_fstp_dest4( cp, &op->FullDstRegisters[0] ); + x87_fstp_dest4( cp, &op->Dst[0] ); return TRUE; } @@ -1539,7 +1517,7 @@ static boolean emit_EXPBASE2( struct aos_compilation *cp, const struct tgsi_full static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); struct x86_reg dst = aos_get_xmm_reg(cp); if (cp->have_sse2) { @@ -1553,7 +1531,7 @@ static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_inst sse_divss(cp->func, dst, arg0); } - store_scalar_dest(cp, &op->FullDstRegisters[0], dst); + store_scalar_dest(cp, &op->Dst[0], dst); return TRUE; } @@ -1573,14 +1551,14 @@ static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_inst static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { if (0) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); struct x86_reg r = aos_get_xmm_reg(cp); sse_rsqrtss(cp->func, r, arg0); - store_scalar_dest(cp, &op->FullDstRegisters[0], r); + store_scalar_dest(cp, &op->Dst[0], r); return TRUE; } else { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); struct x86_reg r = aos_get_xmm_reg(cp); struct x86_reg neg_half = get_reg_ptr( cp, AOS_FILE_INTERNAL, IMM_RSQ ); @@ -1600,7 +1578,7 @@ static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_inst sse_addss( cp->func, tmp, one_point_five ); /* 1.5 - .5 * a * r * r */ sse_mulss( cp->func, r, tmp ); /* r * (1.5 - .5 * a * r * r) */ - store_scalar_dest(cp, &op->FullDstRegisters[0], r); + store_scalar_dest(cp, &op->Dst[0], r); aos_release_xmm_reg(cp, tmp.idx); @@ -1611,23 +1589,23 @@ static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_inst static boolean emit_SGE( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg ones = aos_get_internal(cp, IMM_ONES); struct x86_reg dst = get_xmm_writable(cp, arg0); sse_cmpps(cp->func, dst, arg1, cc_NotLessThan); sse_andps(cp->func, dst, ones); - store_dest(cp, &op->FullDstRegisters[0], dst); + store_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_SIN( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - x87_fld_src(cp, &op->FullSrcRegisters[0], 0); + x87_fld_src(cp, &op->Src[0], 0); x87_fsin(cp->func); - x87_fstp_dest4(cp, &op->FullDstRegisters[0]); + x87_fstp_dest4(cp, &op->Dst[0]); return TRUE; } @@ -1635,46 +1613,46 @@ static boolean emit_SIN( struct aos_compilation *cp, const struct tgsi_full_inst static boolean emit_SLT( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg ones = aos_get_internal(cp, IMM_ONES); struct x86_reg dst = get_xmm_writable(cp, arg0); sse_cmpps(cp->func, dst, arg1, cc_LessThan); sse_andps(cp->func, dst, ones); - store_dest(cp, &op->FullDstRegisters[0], dst); + store_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_SUB( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg dst = get_xmm_writable(cp, arg0); sse_subps(cp->func, dst, arg1); - store_dest(cp, &op->FullDstRegisters[0], dst); + store_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_TRUNC( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); struct x86_reg tmp0 = aos_get_xmm_reg(cp); sse2_cvttps2dq(cp->func, tmp0, arg0); sse2_cvtdq2ps(cp->func, tmp0, tmp0); - store_dest(cp, &op->FullDstRegisters[0], tmp0); + store_dest(cp, &op->Dst[0], tmp0); return TRUE; } static boolean emit_XPD( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg tmp0 = aos_get_xmm_reg(cp); struct x86_reg tmp1 = aos_get_xmm_reg(cp); @@ -1692,7 +1670,7 @@ static boolean emit_XPD( struct aos_compilation *cp, const struct tgsi_full_inst aos_release_xmm_reg(cp, tmp0.idx); - store_dest(cp, &op->FullDstRegisters[0], tmp1); + store_dest(cp, &op->Dst[0], tmp1); return TRUE; } @@ -1759,14 +1737,14 @@ emit_instruction( struct aos_compilation *cp, return emit_SUB(cp, inst); case TGSI_OPCODE_LRP: -// return emit_LERP(cp, inst); + /*return emit_LERP(cp, inst);*/ return FALSE; case TGSI_OPCODE_FRC: return emit_FRC(cp, inst); case TGSI_OPCODE_CLAMP: -// return emit_CLAMP(cp, inst); + /*return emit_CLAMP(cp, inst);*/ return FALSE; case TGSI_OPCODE_FLR: @@ -1919,10 +1897,10 @@ static void find_last_write_outputs( struct aos_compilation *cp ) continue; for (i = 0; i < TGSI_FULL_MAX_DST_REGISTERS; i++) { - if (parse.FullToken.FullInstruction.FullDstRegisters[i].DstRegister.File == + if (parse.FullToken.FullInstruction.Dst[i].Register.File == TGSI_FILE_OUTPUT) { - unsigned idx = parse.FullToken.FullInstruction.FullDstRegisters[i].DstRegister.Index; + unsigned idx = parse.FullToken.FullInstruction.Dst[i].Register.Index; cp->output_last_write[idx] = this_instruction; } } diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index bf84401e11..5cafe8c3f0 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -94,8 +94,8 @@ translate_declaration(struct gallivm_ir *prog, unsigned first, last, mask; uint interp_method; - first = decl->DeclarationRange.First; - last = decl->DeclarationRange.Last; + first = decl->Range.First; + last = decl->Range.Last; mask = decl->Declaration.UsageMask; /* Do not touch WPOS.xy */ @@ -149,7 +149,7 @@ translate_declarationir(struct gallivm_ir *, struct tgsi_full_declaration *) { if (decl->Declaration.File == TGSI_FILE_ADDRESS) { - int idx = decl->DeclarationRange.First; + int idx = decl->Range.First; storage->addAddress(idx); } } @@ -234,26 +234,26 @@ translate_instruction(llvm::Module *module, inputs[3] = 0; for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) { - struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + struct tgsi_full_src_register *src = &inst->Src[i]; llvm::Value *val = 0; llvm::Value *indIdx = 0; - if (src->SrcRegister.Indirect) { - indIdx = storage->addrElement(src->SrcRegisterInd.Index); + if (src->Register.Indirect) { + indIdx = storage->addrElement(src->Indirect.Index); indIdx = storage->extractIndex(indIdx); } - if (src->SrcRegister.File == TGSI_FILE_CONSTANT) { - val = storage->constElement(src->SrcRegister.Index, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_INPUT) { - val = storage->inputElement(src->SrcRegister.Index, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) { - val = storage->tempElement(src->SrcRegister.Index); - } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) { - val = storage->outputElement(src->SrcRegister.Index, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_IMMEDIATE) { - val = storage->immediateElement(src->SrcRegister.Index); + if (src->Register.File == TGSI_FILE_CONSTANT) { + val = storage->constElement(src->Register.Index, indIdx); + } else if (src->Register.File == TGSI_FILE_INPUT) { + val = storage->inputElement(src->Register.Index, indIdx); + } else if (src->Register.File == TGSI_FILE_TEMPORARY) { + val = storage->tempElement(src->Register.Index); + } else if (src->Register.File == TGSI_FILE_OUTPUT) { + val = storage->outputElement(src->Register.Index, indIdx); + } else if (src->Register.File == TGSI_FILE_IMMEDIATE) { + val = storage->immediateElement(src->Register.Index); } else { - fprintf(stderr, "ERROR: not supported llvm source %d\n", src->SrcRegister.File); + fprintf(stderr, "ERROR: not supported llvm source %d\n", src->Register.File); return; } @@ -656,14 +656,14 @@ translate_instruction(llvm::Module *module, /* store results */ for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { - struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; - - if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { - storage->setOutputElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); - } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) { - storage->setTempElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); - } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) { - storage->setAddrElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + struct tgsi_full_dst_register *dst = &inst->Dst[i]; + + if (dst->Register.File == TGSI_FILE_OUTPUT) { + storage->setOutputElement(dst->Register.Index, out, dst->Register.WriteMask); + } else if (dst->Register.File == TGSI_FILE_TEMPORARY) { + storage->setTempElement(dst->Register.Index, out, dst->Register.WriteMask); + } else if (dst->Register.File == TGSI_FILE_ADDRESS) { + storage->setAddrElement(dst->Register.Index, out, dst->Register.WriteMask); } else { fprintf(stderr, "ERROR: unsupported LLVM destination!"); assert(!"wrong destination"); @@ -683,16 +683,16 @@ translate_instructionir(llvm::Module *module, std::vector< std::vector<llvm::Value*> > inputs(inst->Instruction.NumSrcRegs); for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) { - struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + struct tgsi_full_src_register *src = &inst->Src[i]; std::vector<llvm::Value*> val; llvm::Value *indIdx = 0; int swizzle = swizzleInt(src); - if (src->SrcRegister.Indirect) { - indIdx = storage->addrElement(src->SrcRegisterInd.Index); + if (src->Register.Indirect) { + indIdx = storage->addrElement(src->Indirect.Index); } - val = storage->load((enum tgsi_file_type)src->SrcRegister.File, - src->SrcRegister.Index, swizzle, instr->getIRBuilder(), indIdx); + val = storage->load((enum tgsi_file_type)src->Register.File, + src->Register.Index, swizzle, instr->getIRBuilder(), indIdx); inputs[i] = val; } @@ -993,9 +993,9 @@ translate_instructionir(llvm::Module *module, /* store results */ for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { - struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; - storage->store((enum tgsi_file_type)dst->DstRegister.File, - dst->DstRegister.Index, out, dst->DstRegister.WriteMask, + struct tgsi_full_dst_register *dst = &inst->Dst[i]; + storage->store((enum tgsi_file_type)dst->Register.File, + dst->Register.Index, out, dst->Register.WriteMask, instr->getIRBuilder() ); } } diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h index 2590546cb4..eb7e84be84 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h @@ -46,7 +46,7 @@ #include "pipe/p_compiler.h" #include "util/u_debug.h" -#include "pipe/p_error.h" +#include "pipe/p_defines.h" #include "pipe/p_state.h" @@ -237,8 +237,9 @@ pb_reference(struct pb_buffer **dst, { struct pb_buffer *old = *dst; - if (pipe_reference((struct pipe_reference**)dst, &src->base.reference)) + if (pipe_reference(&(*dst)->base.reference, &src->base.reference)) pb_destroy( old ); + *dst = src; } diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index 109ac7c9d6..a9375abd21 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2007-2009 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -18,7 +18,7 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -42,7 +42,7 @@ #endif #include "pipe/p_compiler.h" -#include "pipe/p_error.h" +#include "pipe/p_defines.h" #include "util/u_debug.h" #include "pipe/p_thread.h" #include "util/u_memory.h" @@ -80,11 +80,27 @@ struct fenced_buffer_list */ struct fenced_buffer { + /* + * Immutable members. + */ + struct pb_buffer base; - struct pb_buffer *buffer; + struct fenced_buffer_list *list; + + /** + * Protected by fenced_buffer_list::mutex + */ + struct list_head head; - /* FIXME: protect access with mutex */ + /** + * Following members are mutable and protected by this mutex. + * + * You may lock this mutex alone, or lock it with fenced_buffer_list::mutex + * held, but in order to prevent deadlocks you must never lock + * fenced_buffer_list::mutex with this mutex held. + */ + pipe_mutex mutex; /** * A bitmask of PIPE_BUFFER_USAGE_CPU/GPU_READ/WRITE describing the current @@ -96,9 +112,6 @@ struct fenced_buffer struct pb_validate *vl; unsigned validation_flags; struct pipe_fence_handle *fence; - - struct list_head head; - struct fenced_buffer_list *list; }; @@ -110,15 +123,24 @@ fenced_buffer(struct pb_buffer *buf) } +/** + * Add the buffer to the fenced list. + * + * fenced_buffer_list::mutex and fenced_buffer::mutex must be held, in this + * order before calling this function. + * + * Reference count should be incremented before calling this function. + */ static INLINE void -_fenced_buffer_add(struct fenced_buffer *fenced_buf) +fenced_buffer_add_locked(struct fenced_buffer_list *fenced_list, + struct fenced_buffer *fenced_buf) { - struct fenced_buffer_list *fenced_list = fenced_buf->list; - assert(pipe_is_referenced(&fenced_buf->base.base.reference)); assert(fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); assert(fenced_buf->fence); + /* TODO: Move the reference count increment here */ + #ifdef DEBUG LIST_DEL(&fenced_buf->head); assert(fenced_list->numUnfenced); @@ -130,32 +152,16 @@ _fenced_buffer_add(struct fenced_buffer *fenced_buf) /** - * Actually destroy the buffer. + * Remove the buffer from the fenced list. + * + * fenced_buffer_list::mutex and fenced_buffer::mutex must be held, in this + * order before calling this function. + * + * Reference count should be decremented after calling this function. */ static INLINE void -_fenced_buffer_destroy(struct fenced_buffer *fenced_buf) -{ - struct fenced_buffer_list *fenced_list = fenced_buf->list; - - assert(!pipe_is_referenced(&fenced_buf->base.base.reference)); - assert(!fenced_buf->fence); -#ifdef DEBUG - assert(fenced_buf->head.prev); - assert(fenced_buf->head.next); - LIST_DEL(&fenced_buf->head); - assert(fenced_list->numUnfenced); - --fenced_list->numUnfenced; -#else - (void)fenced_list; -#endif - pb_reference(&fenced_buf->buffer, NULL); - FREE(fenced_buf); -} - - -static INLINE void -_fenced_buffer_remove(struct fenced_buffer_list *fenced_list, - struct fenced_buffer *fenced_buf) +fenced_buffer_remove_locked(struct fenced_buffer_list *fenced_list, + struct fenced_buffer *fenced_buf) { struct pb_fence_ops *ops = fenced_list->ops; @@ -177,37 +183,53 @@ _fenced_buffer_remove(struct fenced_buffer_list *fenced_list, ++fenced_list->numUnfenced; #endif - /** - * FIXME!!! - */ - - if(!pipe_is_referenced(&fenced_buf->base.base.reference)) - _fenced_buffer_destroy(fenced_buf); + /* TODO: Move the reference count decrement and destruction here */ } +/** + * Wait for the fence to expire, and remove it from the fenced list. + * + * fenced_buffer::mutex must be held. fenced_buffer_list::mutex must not be + * held -- it will + */ static INLINE enum pipe_error -_fenced_buffer_finish(struct fenced_buffer *fenced_buf) +fenced_buffer_finish_locked(struct fenced_buffer_list *fenced_list, + struct fenced_buffer *fenced_buf) { - struct fenced_buffer_list *fenced_list = fenced_buf->list; struct pb_fence_ops *ops = fenced_list->ops; + enum pipe_error ret = PIPE_ERROR; #if 0 debug_warning("waiting for GPU"); #endif + assert(pipe_is_referenced(&fenced_buf->base.base.reference)); assert(fenced_buf->fence); + + /* Acquire the global lock */ + pipe_mutex_unlock(fenced_buf->mutex); + pipe_mutex_lock(fenced_list->mutex); + pipe_mutex_lock(fenced_buf->mutex); + if(fenced_buf->fence) { - if(ops->fence_finish(ops, fenced_buf->fence, 0) != 0) { - return PIPE_ERROR; + if(ops->fence_finish(ops, fenced_buf->fence, 0) == 0) { + /* Remove from the fenced list */ + /* TODO: remove consequents */ + fenced_buffer_remove_locked(fenced_list, fenced_buf); + + p_atomic_dec(&fenced_buf->base.base.reference.count); + assert(pipe_is_referenced(&fenced_buf->base.base.reference)); + + fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE; + + ret = PIPE_OK; } - /* Remove from the fenced list */ - /* TODO: remove consequents */ - _fenced_buffer_remove(fenced_list, fenced_buf); } - fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE; - return PIPE_OK; + pipe_mutex_unlock(fenced_list->mutex); + + return ret; } @@ -215,12 +237,13 @@ _fenced_buffer_finish(struct fenced_buffer *fenced_buf) * Free as many fenced buffers from the list head as possible. */ static void -_fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, +fenced_buffer_list_check_free_locked(struct fenced_buffer_list *fenced_list, int wait) { struct pb_fence_ops *ops = fenced_list->ops; struct list_head *curr, *next; struct fenced_buffer *fenced_buf; + struct pb_buffer *pb_buf; struct pipe_fence_handle *prev_fence = NULL; curr = fenced_list->delayed.next; @@ -228,21 +251,30 @@ _fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, while(curr != &fenced_list->delayed) { fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + pipe_mutex_lock(fenced_buf->mutex); + if(fenced_buf->fence != prev_fence) { int signaled; if (wait) signaled = ops->fence_finish(ops, fenced_buf->fence, 0); else signaled = ops->fence_signalled(ops, fenced_buf->fence, 0); - if (signaled != 0) + if (signaled != 0) { + pipe_mutex_unlock(fenced_buf->mutex); break; + } prev_fence = fenced_buf->fence; } else { assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0); } - _fenced_buffer_remove(fenced_list, fenced_buf); + fenced_buffer_remove_locked(fenced_list, fenced_buf); + pipe_mutex_unlock(fenced_buf->mutex); + + pb_buf = &fenced_buf->base; + pb_reference(&pb_buf, NULL); + curr = next; next = curr->next; @@ -256,30 +288,25 @@ fenced_buffer_destroy(struct pb_buffer *buf) struct fenced_buffer *fenced_buf = fenced_buffer(buf); struct fenced_buffer_list *fenced_list = fenced_buf->list; - pipe_mutex_lock(fenced_list->mutex); assert(!pipe_is_referenced(&fenced_buf->base.base.reference)); - if (fenced_buf->fence) { - struct pb_fence_ops *ops = fenced_list->ops; - if(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) { - struct list_head *curr, *prev; - curr = &fenced_buf->head; - prev = curr->prev; - do { - fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); - assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0); - _fenced_buffer_remove(fenced_list, fenced_buf); - curr = prev; - prev = curr->prev; - } while (curr != &fenced_list->delayed); - } - else { - /* delay destruction */ - } - } - else { - _fenced_buffer_destroy(fenced_buf); - } + assert(!fenced_buf->fence); + +#ifdef DEBUG + pipe_mutex_lock(fenced_list->mutex); + assert(fenced_buf->head.prev); + assert(fenced_buf->head.next); + LIST_DEL(&fenced_buf->head); + assert(fenced_list->numUnfenced); + --fenced_list->numUnfenced; pipe_mutex_unlock(fenced_list->mutex); +#else + (void)fenced_list; +#endif + + pb_reference(&fenced_buf->buffer, NULL); + + pipe_mutex_destroy(fenced_buf->mutex); + FREE(fenced_buf); } @@ -290,24 +317,23 @@ fenced_buffer_map(struct pb_buffer *buf, struct fenced_buffer *fenced_buf = fenced_buffer(buf); struct fenced_buffer_list *fenced_list = fenced_buf->list; struct pb_fence_ops *ops = fenced_list->ops; - void *map; + void *map = NULL; + + pipe_mutex_lock(fenced_buf->mutex); assert(!(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE)); /* Serialize writes */ if((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_WRITE) || ((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ) && (flags & PIPE_BUFFER_USAGE_CPU_WRITE))) { - if(flags & PIPE_BUFFER_USAGE_DONTBLOCK) { + if((flags & PIPE_BUFFER_USAGE_DONTBLOCK) && + ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) { /* Don't wait for the GPU to finish writing */ - if(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) - _fenced_buffer_remove(fenced_list, fenced_buf); - else - return NULL; - } - else { - /* Wait for the GPU to finish writing */ - _fenced_buffer_finish(fenced_buf); + goto finish; } + + /* Wait for the GPU to finish writing */ + fenced_buffer_finish_locked(fenced_list, fenced_buf); } #if 0 @@ -324,6 +350,9 @@ fenced_buffer_map(struct pb_buffer *buf, fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE; } +finish: + pipe_mutex_unlock(fenced_buf->mutex); + return map; } @@ -332,6 +361,9 @@ static void fenced_buffer_unmap(struct pb_buffer *buf) { struct fenced_buffer *fenced_buf = fenced_buffer(buf); + + pipe_mutex_lock(fenced_buf->mutex); + assert(fenced_buf->mapcount); if(fenced_buf->mapcount) { pb_unmap(fenced_buf->buffer); @@ -339,6 +371,8 @@ fenced_buffer_unmap(struct pb_buffer *buf) if(!fenced_buf->mapcount) fenced_buf->flags &= ~PIPE_BUFFER_USAGE_CPU_READ_WRITE; } + + pipe_mutex_unlock(fenced_buf->mutex); } @@ -350,11 +384,14 @@ fenced_buffer_validate(struct pb_buffer *buf, struct fenced_buffer *fenced_buf = fenced_buffer(buf); enum pipe_error ret; + pipe_mutex_lock(fenced_buf->mutex); + if(!vl) { /* invalidate */ fenced_buf->vl = NULL; fenced_buf->validation_flags = 0; - return PIPE_OK; + ret = PIPE_OK; + goto finish; } assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); @@ -362,14 +399,17 @@ fenced_buffer_validate(struct pb_buffer *buf, flags &= PIPE_BUFFER_USAGE_GPU_READ_WRITE; /* Buffer cannot be validated in two different lists */ - if(fenced_buf->vl && fenced_buf->vl != vl) - return PIPE_ERROR_RETRY; + if(fenced_buf->vl && fenced_buf->vl != vl) { + ret = PIPE_ERROR_RETRY; + goto finish; + } #if 0 /* Do not validate if buffer is still mapped */ if(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) { /* TODO: wait for the thread that mapped the buffer to unmap it */ - return PIPE_ERROR_RETRY; + ret = PIPE_ERROR_RETRY; + goto finish; } /* Final sanity checking */ assert(!(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE)); @@ -379,17 +419,21 @@ fenced_buffer_validate(struct pb_buffer *buf, if(fenced_buf->vl == vl && (fenced_buf->validation_flags & flags) == flags) { /* Nothing to do -- buffer already validated */ - return PIPE_OK; + ret = PIPE_OK; + goto finish; } ret = pb_validate(fenced_buf->buffer, vl, flags); if (ret != PIPE_OK) - return ret; + goto finish; fenced_buf->vl = vl; fenced_buf->validation_flags |= flags; - return PIPE_OK; +finish: + pipe_mutex_unlock(fenced_buf->mutex); + + return ret; } @@ -404,29 +448,36 @@ fenced_buffer_fence(struct pb_buffer *buf, fenced_buf = fenced_buffer(buf); fenced_list = fenced_buf->list; ops = fenced_list->ops; - - if(fence == fenced_buf->fence) { - /* Nothing to do */ - return; - } - assert(fenced_buf->vl); - assert(fenced_buf->validation_flags); - pipe_mutex_lock(fenced_list->mutex); - if (fenced_buf->fence) - _fenced_buffer_remove(fenced_list, fenced_buf); - if (fence) { - ops->fence_reference(ops, &fenced_buf->fence, fence); - fenced_buf->flags |= fenced_buf->validation_flags; - _fenced_buffer_add(fenced_buf); - } - pipe_mutex_unlock(fenced_list->mutex); + pipe_mutex_lock(fenced_buf->mutex); + + assert(pipe_is_referenced(&fenced_buf->base.base.reference)); + + if(fence != fenced_buf->fence) { + assert(fenced_buf->vl); + assert(fenced_buf->validation_flags); + + if (fenced_buf->fence) { + fenced_buffer_remove_locked(fenced_list, fenced_buf); + p_atomic_dec(&fenced_buf->base.base.reference.count); + assert(pipe_is_referenced(&fenced_buf->base.base.reference)); + } + if (fence) { + ops->fence_reference(ops, &fenced_buf->fence, fence); + fenced_buf->flags |= fenced_buf->validation_flags; + p_atomic_inc(&fenced_buf->base.base.reference.count); + fenced_buffer_add_locked(fenced_list, fenced_buf); + } + + pb_fence(fenced_buf->buffer, fence); - pb_fence(fenced_buf->buffer, fence); + fenced_buf->vl = NULL; + fenced_buf->validation_flags = 0; + } - fenced_buf->vl = NULL; - fenced_buf->validation_flags = 0; + pipe_mutex_unlock(fenced_buf->mutex); + pipe_mutex_unlock(fenced_list->mutex); } @@ -436,6 +487,7 @@ fenced_buffer_get_base_buffer(struct pb_buffer *buf, pb_size *offset) { struct fenced_buffer *fenced_buf = fenced_buffer(buf); + /* NOTE: accesses immutable members only -- mutex not necessary */ pb_get_base_buffer(fenced_buf->buffer, base_buf, offset); } @@ -475,6 +527,8 @@ fenced_buffer_create(struct fenced_buffer_list *fenced_list, buf->buffer = buffer; buf->list = fenced_list; + pipe_mutex_init(buf->mutex); + #ifdef DEBUG pipe_mutex_lock(fenced_list->mutex); LIST_ADDTAIL(&buf->head, &fenced_list->unfenced); @@ -516,7 +570,7 @@ fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, int wait) { pipe_mutex_lock(fenced_list->mutex); - _fenced_buffer_list_check_free(fenced_list, wait); + fenced_buffer_list_check_free_locked(fenced_list, wait); pipe_mutex_unlock(fenced_list->mutex); } @@ -538,11 +592,13 @@ fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list) next = curr->next; while(curr != &fenced_list->unfenced) { fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + pipe_mutex_lock(fenced_buf->mutex); assert(!fenced_buf->fence); debug_printf("%10p %7u %7u\n", - fenced_buf, + (void *) fenced_buf, fenced_buf->base.base.size, - fenced_buf->base.base.reference.count); + p_atomic_read(&fenced_buf->base.base.reference.count)); + pipe_mutex_unlock(fenced_buf->mutex); curr = next; next = curr->next; } @@ -552,13 +608,15 @@ fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list) while(curr != &fenced_list->delayed) { int signaled; fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + pipe_mutex_lock(fenced_buf->mutex); signaled = ops->fence_signalled(ops, fenced_buf->fence, 0); debug_printf("%10p %7u %7u %10p %s\n", - fenced_buf, + (void *) fenced_buf, fenced_buf->base.base.size, - fenced_buf->base.base.reference.count, - fenced_buf->fence, + p_atomic_read(&fenced_buf->base.base.reference.count), + (void *) fenced_buf->fence, signaled == 0 ? "y" : "n"); + pipe_mutex_unlock(fenced_buf->mutex); curr = next; next = curr->next; } @@ -579,15 +637,16 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list) #if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) sched_yield(); #endif - _fenced_buffer_list_check_free(fenced_list, 1); pipe_mutex_lock(fenced_list->mutex); + fenced_buffer_list_check_free_locked(fenced_list, 1); } #ifdef DEBUG - //assert(!fenced_list->numUnfenced); + /*assert(!fenced_list->numUnfenced);*/ #endif pipe_mutex_unlock(fenced_list->mutex); + pipe_mutex_destroy(fenced_list->mutex); fenced_list->ops->destroy(fenced_list->ops); diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h index 39ab8e722c..8c8d713078 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -51,7 +51,7 @@ #include "pipe/p_compiler.h" -#include "pipe/p_error.h" +#include "pipe/p_defines.h" #ifdef __cplusplus diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index 57d1ede45a..7b34c8e357 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -294,7 +294,7 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr, LIST_DEL(&buf->head); pipe_mutex_unlock(mgr->mutex); /* Increase refcount */ - pb_reference((struct pb_buffer**)&buf, &buf->base); + pipe_reference(NULL, &buf->base.base.reference); return &buf->base; } diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c index 1b4df28c70..6e3214ca9c 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c @@ -350,7 +350,7 @@ pb_debug_manager_dump(struct pb_debug_manager *mgr) buf = LIST_ENTRY(struct pb_debug_buffer, curr, head); debug_printf("buffer = %p\n", buf); - debug_printf(" .size = %p\n", buf->base.base.size); + debug_printf(" .size = 0x%x\n", buf->base.base.size); debug_backtrace_dump(buf->create_backtrace, PB_DEBUG_CREATE_BACKTRACE); curr = next; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c index e7352e90db..d21910d0bf 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c @@ -37,7 +37,6 @@ */ #include "pipe/p_compiler.h" -#include "pipe/p_error.h" #include "util/u_debug.h" #include "pipe/p_thread.h" #include "pipe/p_defines.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.c b/src/gallium/auxiliary/pipebuffer/pb_validate.c index 150fd50618..ce40c0cf0e 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_validate.c +++ b/src/gallium/auxiliary/pipebuffer/pb_validate.c @@ -34,7 +34,7 @@ #include "pipe/p_compiler.h" -#include "pipe/p_error.h" +#include "pipe/p_defines.h" #include "util/u_memory.h" #include "util/u_debug.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.h b/src/gallium/auxiliary/pipebuffer/pb_validate.h index dfb84df1ce..3c93f30f20 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_validate.h +++ b/src/gallium/auxiliary/pipebuffer/pb_validate.h @@ -37,7 +37,7 @@ #include "pipe/p_compiler.h" -#include "pipe/p_error.h" +#include "pipe/p_defines.h" #ifdef __cplusplus extern "C" { diff --git a/src/gallium/auxiliary/rbug/README b/src/gallium/auxiliary/rbug/README index 33d76371de..d984067893 100644 --- a/src/gallium/auxiliary/rbug/README +++ b/src/gallium/auxiliary/rbug/README @@ -16,6 +16,10 @@ for information about applications look in: progs/rbug/README +for a GUI see: + + http://cgit.freedesktop.org/mesa/rbug-gui + -- Jakob Bornecrantz <jakob@vmware.com> diff --git a/src/gallium/auxiliary/rbug/rbug_connection.c b/src/gallium/auxiliary/rbug/rbug_connection.c index 52acb700af..ae4e27f9f6 100644 --- a/src/gallium/auxiliary/rbug/rbug_connection.c +++ b/src/gallium/auxiliary/rbug/rbug_connection.c @@ -87,6 +87,7 @@ rbug_get_message(struct rbug_connection *c, uint32_t *serial) if (!data) { return NULL; } + data->opcode = 0; do { uint8_t *ptr = ((uint8_t*)data) + read; diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt index eb492076b7..080fd4c731 100644 --- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt +++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt @@ -1129,3 +1129,35 @@ TGSI Instruction Specification target Label of target instruction. + +3 Other tokens +=============== + + +3.1 Declaration Semantic +------------------------- + + + Follows Declaration token if Semantic bit is set. + + Since its purpose is to link a shader with other stages of the pipeline, + it is valid to follow only those Declaration tokens that declare a register + either in INPUT or OUTPUT file. + + SemanticName field contains the semantic name of the register being declared. + There is no default value. + + SemanticIndex is an optional subscript that can be used to distinguish + different register declarations with the same semantic name. The default value + is 0. + + The meanings of the individual semantic names are explained in the following + sections. + + +3.1.1 FACE + + Valid only in a fragment shader INPUT declaration. + + FACE.x is negative when the primitive is back facing. FACE.x is positive + when the primitive is front facing. diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index e0cfc54420..4092f78f4a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -30,21 +30,6 @@ #include "tgsi_build.h" #include "tgsi_parse.h" -/* - * version - */ - -struct tgsi_version -tgsi_build_version( void ) -{ - struct tgsi_version version; - - version.MajorVersion = 1; - version.MinorVersion = 1; - version.Padding = 0; - - return version; -} /* * header @@ -122,7 +107,6 @@ tgsi_default_declaration( void ) declaration.Centroid = 0; declaration.Invariant = 0; declaration.Padding = 0; - declaration.Extended = 0; return declaration; } @@ -173,7 +157,7 @@ tgsi_default_full_declaration( void ) struct tgsi_full_declaration full_declaration; full_declaration.Declaration = tgsi_default_declaration(); - full_declaration.DeclarationRange = tgsi_default_declaration_range(); + full_declaration.Range = tgsi_default_declaration_range(); full_declaration.Semantic = tgsi_default_declaration_semantic(); return full_declaration; @@ -210,8 +194,8 @@ tgsi_build_full_declaration( size++; *dr = tgsi_build_declaration_range( - full_decl->DeclarationRange.First, - full_decl->DeclarationRange.Last, + full_decl->Range.First, + full_decl->Range.Last, declaration, header ); @@ -224,8 +208,8 @@ tgsi_build_full_declaration( size++; *ds = tgsi_build_declaration_semantic( - full_decl->Semantic.SemanticName, - full_decl->Semantic.SemanticIndex, + full_decl->Semantic.Name, + full_decl->Semantic.Index, declaration, header ); } @@ -270,8 +254,8 @@ tgsi_default_declaration_semantic( void ) { struct tgsi_declaration_semantic ds; - ds.SemanticName = TGSI_SEMANTIC_POSITION; - ds.SemanticIndex = 0; + ds.Name = TGSI_SEMANTIC_POSITION; + ds.Index = 0; ds.Padding = 0; return ds; @@ -290,8 +274,8 @@ tgsi_build_declaration_semantic( assert( semantic_index <= 0xFFFF ); ds = tgsi_default_declaration_semantic(); - ds.SemanticName = semantic_name; - ds.SemanticIndex = semantic_index; + ds.Name = semantic_name; + ds.Index = semantic_index; declaration_grow( declaration, header ); @@ -311,7 +295,6 @@ tgsi_default_immediate( void ) immediate.NrTokens = 1; immediate.DataType = TGSI_IMM_FLOAT32; immediate.Padding = 0; - immediate.Extended = 0; return immediate; } @@ -416,24 +399,26 @@ tgsi_default_instruction( void ) struct tgsi_instruction instruction; instruction.Type = TGSI_TOKEN_TYPE_INSTRUCTION; - instruction.NrTokens = 1; + instruction.NrTokens = 0; instruction.Opcode = TGSI_OPCODE_MOV; instruction.Saturate = TGSI_SAT_NONE; + instruction.Predicate = 0; instruction.NumDstRegs = 1; instruction.NumSrcRegs = 1; + instruction.Label = 0; + instruction.Texture = 0; instruction.Padding = 0; - instruction.Extended = 0; return instruction; } struct tgsi_instruction -tgsi_build_instruction( - unsigned opcode, - unsigned saturate, - unsigned num_dst_regs, - unsigned num_src_regs, - struct tgsi_header *header ) +tgsi_build_instruction(unsigned opcode, + unsigned saturate, + unsigned predicate, + unsigned num_dst_regs, + unsigned num_src_regs, + struct tgsi_header *header) { struct tgsi_instruction instruction; @@ -445,6 +430,7 @@ tgsi_build_instruction( instruction = tgsi_default_instruction(); instruction.Opcode = opcode; instruction.Saturate = saturate; + instruction.Predicate = predicate; instruction.NumDstRegs = num_dst_regs; instruction.NumSrcRegs = num_src_regs; @@ -472,18 +458,16 @@ tgsi_default_full_instruction( void ) unsigned i; full_instruction.Instruction = tgsi_default_instruction(); - full_instruction.InstructionExtNv = tgsi_default_instruction_ext_nv(); - full_instruction.InstructionExtLabel = tgsi_default_instruction_ext_label(); - full_instruction.InstructionExtTexture = tgsi_default_instruction_ext_texture(); + full_instruction.Predicate = tgsi_default_instruction_predicate(); + full_instruction.Label = tgsi_default_instruction_label(); + full_instruction.Texture = tgsi_default_instruction_texture(); for( i = 0; i < TGSI_FULL_MAX_DST_REGISTERS; i++ ) { - full_instruction.FullDstRegisters[i] = tgsi_default_full_dst_register(); + full_instruction.Dst[i] = tgsi_default_full_dst_register(); } for( i = 0; i < TGSI_FULL_MAX_SRC_REGISTERS; i++ ) { - full_instruction.FullSrcRegisters[i] = tgsi_default_full_src_register(); + full_instruction.Src[i] = tgsi_default_full_src_register(); } - full_instruction.Flags = 0x0; - return full_instruction; } @@ -504,82 +488,70 @@ tgsi_build_full_instruction( instruction = (struct tgsi_instruction *) &tokens[size]; size++; - *instruction = tgsi_build_instruction( - full_inst->Instruction.Opcode, - full_inst->Instruction.Saturate, - full_inst->Instruction.NumDstRegs, - full_inst->Instruction.NumSrcRegs, - header ); + *instruction = tgsi_build_instruction(full_inst->Instruction.Opcode, + full_inst->Instruction.Saturate, + full_inst->Instruction.Predicate, + full_inst->Instruction.NumDstRegs, + full_inst->Instruction.NumSrcRegs, + header); prev_token = (struct tgsi_token *) instruction; - if( tgsi_compare_instruction_ext_nv( - full_inst->InstructionExtNv, - tgsi_default_instruction_ext_nv() ) ) { - struct tgsi_instruction_ext_nv *instruction_ext_nv; + if (full_inst->Instruction.Predicate) { + struct tgsi_instruction_predicate *instruction_predicate; - if( maxsize <= size ) + if (maxsize <= size) { return 0; - instruction_ext_nv = - (struct tgsi_instruction_ext_nv *) &tokens[size]; + } + instruction_predicate = (struct tgsi_instruction_predicate *)&tokens[size]; size++; - *instruction_ext_nv = tgsi_build_instruction_ext_nv( - full_inst->InstructionExtNv.Precision, - full_inst->InstructionExtNv.CondDstIndex, - full_inst->InstructionExtNv.CondFlowIndex, - full_inst->InstructionExtNv.CondMask, - full_inst->InstructionExtNv.CondSwizzleX, - full_inst->InstructionExtNv.CondSwizzleY, - full_inst->InstructionExtNv.CondSwizzleZ, - full_inst->InstructionExtNv.CondSwizzleW, - full_inst->InstructionExtNv.CondDstUpdate, - full_inst->InstructionExtNv.CondFlowEnable, - prev_token, - instruction, - header ); - prev_token = (struct tgsi_token *) instruction_ext_nv; + *instruction_predicate = + tgsi_build_instruction_predicate(full_inst->Predicate.Index, + full_inst->Predicate.Negate, + full_inst->Predicate.SwizzleX, + full_inst->Predicate.SwizzleY, + full_inst->Predicate.SwizzleZ, + full_inst->Predicate.SwizzleW, + instruction, + header); } - if( tgsi_compare_instruction_ext_label( - full_inst->InstructionExtLabel, - tgsi_default_instruction_ext_label() ) ) { - struct tgsi_instruction_ext_label *instruction_ext_label; + if (full_inst->Instruction.Label) { + struct tgsi_instruction_label *instruction_label; if( maxsize <= size ) return 0; - instruction_ext_label = - (struct tgsi_instruction_ext_label *) &tokens[size]; + instruction_label = + (struct tgsi_instruction_label *) &tokens[size]; size++; - *instruction_ext_label = tgsi_build_instruction_ext_label( - full_inst->InstructionExtLabel.Label, + *instruction_label = tgsi_build_instruction_label( + full_inst->Label.Label, prev_token, instruction, header ); - prev_token = (struct tgsi_token *) instruction_ext_label; + prev_token = (struct tgsi_token *) instruction_label; } - if( tgsi_compare_instruction_ext_texture( - full_inst->InstructionExtTexture, - tgsi_default_instruction_ext_texture() ) ) { - struct tgsi_instruction_ext_texture *instruction_ext_texture; + if (full_inst->Instruction.Texture) { + struct tgsi_instruction_texture *instruction_texture; if( maxsize <= size ) return 0; - instruction_ext_texture = - (struct tgsi_instruction_ext_texture *) &tokens[size]; + instruction_texture = + (struct tgsi_instruction_texture *) &tokens[size]; size++; - *instruction_ext_texture = tgsi_build_instruction_ext_texture( - full_inst->InstructionExtTexture.Texture, + *instruction_texture = tgsi_build_instruction_texture( + full_inst->Texture.Texture, prev_token, instruction, header ); - prev_token = (struct tgsi_token *) instruction_ext_texture; + prev_token = (struct tgsi_token *) instruction_texture; } for( i = 0; i < full_inst->Instruction.NumDstRegs; i++ ) { - const struct tgsi_full_dst_register *reg = &full_inst->FullDstRegisters[i]; + const struct tgsi_full_dst_register *reg = &full_inst->Dst[i]; struct tgsi_dst_register *dst_register; struct tgsi_token *prev_token; @@ -589,58 +561,15 @@ tgsi_build_full_instruction( size++; *dst_register = tgsi_build_dst_register( - reg->DstRegister.File, - reg->DstRegister.WriteMask, - reg->DstRegister.Indirect, - reg->DstRegister.Index, + reg->Register.File, + reg->Register.WriteMask, + reg->Register.Indirect, + reg->Register.Index, instruction, header ); prev_token = (struct tgsi_token *) dst_register; - if( tgsi_compare_dst_register_ext_concode( - reg->DstRegisterExtConcode, - tgsi_default_dst_register_ext_concode() ) ) { - struct tgsi_dst_register_ext_concode *dst_register_ext_concode; - - if( maxsize <= size ) - return 0; - dst_register_ext_concode = - (struct tgsi_dst_register_ext_concode *) &tokens[size]; - size++; - - *dst_register_ext_concode = tgsi_build_dst_register_ext_concode( - reg->DstRegisterExtConcode.CondMask, - reg->DstRegisterExtConcode.CondSwizzleX, - reg->DstRegisterExtConcode.CondSwizzleY, - reg->DstRegisterExtConcode.CondSwizzleZ, - reg->DstRegisterExtConcode.CondSwizzleW, - reg->DstRegisterExtConcode.CondSrcIndex, - prev_token, - instruction, - header ); - prev_token = (struct tgsi_token *) dst_register_ext_concode; - } - - if( tgsi_compare_dst_register_ext_modulate( - reg->DstRegisterExtModulate, - tgsi_default_dst_register_ext_modulate() ) ) { - struct tgsi_dst_register_ext_modulate *dst_register_ext_modulate; - - if( maxsize <= size ) - return 0; - dst_register_ext_modulate = - (struct tgsi_dst_register_ext_modulate *) &tokens[size]; - size++; - - *dst_register_ext_modulate = tgsi_build_dst_register_ext_modulate( - reg->DstRegisterExtModulate.Modulate, - prev_token, - instruction, - header ); - prev_token = (struct tgsi_token *) dst_register_ext_modulate; - } - - if( reg->DstRegister.Indirect ) { + if( reg->Register.Indirect ) { struct tgsi_src_register *ind; if( maxsize <= size ) @@ -649,22 +578,23 @@ tgsi_build_full_instruction( size++; *ind = tgsi_build_src_register( - reg->DstRegisterInd.File, - reg->DstRegisterInd.SwizzleX, - reg->DstRegisterInd.SwizzleY, - reg->DstRegisterInd.SwizzleZ, - reg->DstRegisterInd.SwizzleW, - reg->DstRegisterInd.Negate, - reg->DstRegisterInd.Indirect, - reg->DstRegisterInd.Dimension, - reg->DstRegisterInd.Index, + reg->Indirect.File, + reg->Indirect.SwizzleX, + reg->Indirect.SwizzleY, + reg->Indirect.SwizzleZ, + reg->Indirect.SwizzleW, + reg->Indirect.Negate, + reg->Indirect.Absolute, + reg->Indirect.Indirect, + reg->Indirect.Dimension, + reg->Indirect.Index, instruction, header ); } } for( i = 0; i < full_inst->Instruction.NumSrcRegs; i++ ) { - const struct tgsi_full_src_register *reg = &full_inst->FullSrcRegisters[i]; + const struct tgsi_full_src_register *reg = &full_inst->Src[i]; struct tgsi_src_register *src_register; struct tgsi_token *prev_token; @@ -674,77 +604,21 @@ tgsi_build_full_instruction( size++; *src_register = tgsi_build_src_register( - reg->SrcRegister.File, - reg->SrcRegister.SwizzleX, - reg->SrcRegister.SwizzleY, - reg->SrcRegister.SwizzleZ, - reg->SrcRegister.SwizzleW, - reg->SrcRegister.Negate, - reg->SrcRegister.Indirect, - reg->SrcRegister.Dimension, - reg->SrcRegister.Index, + reg->Register.File, + reg->Register.SwizzleX, + reg->Register.SwizzleY, + reg->Register.SwizzleZ, + reg->Register.SwizzleW, + reg->Register.Negate, + reg->Register.Absolute, + reg->Register.Indirect, + reg->Register.Dimension, + reg->Register.Index, instruction, header ); prev_token = (struct tgsi_token *) src_register; - if( tgsi_compare_src_register_ext_swz( - reg->SrcRegisterExtSwz, - tgsi_default_src_register_ext_swz() ) ) { - struct tgsi_src_register_ext_swz *src_register_ext_swz; - - /* Use of the extended swizzle requires the simple swizzle to be identity. - */ - assert( reg->SrcRegister.SwizzleX == TGSI_SWIZZLE_X ); - assert( reg->SrcRegister.SwizzleY == TGSI_SWIZZLE_Y ); - assert( reg->SrcRegister.SwizzleZ == TGSI_SWIZZLE_Z ); - assert( reg->SrcRegister.SwizzleW == TGSI_SWIZZLE_W ); - assert( reg->SrcRegister.Negate == FALSE ); - - if( maxsize <= size ) - return 0; - src_register_ext_swz = - (struct tgsi_src_register_ext_swz *) &tokens[size]; - size++; - - *src_register_ext_swz = tgsi_build_src_register_ext_swz( - reg->SrcRegisterExtSwz.ExtSwizzleX, - reg->SrcRegisterExtSwz.ExtSwizzleY, - reg->SrcRegisterExtSwz.ExtSwizzleZ, - reg->SrcRegisterExtSwz.ExtSwizzleW, - reg->SrcRegisterExtSwz.NegateX, - reg->SrcRegisterExtSwz.NegateY, - reg->SrcRegisterExtSwz.NegateZ, - reg->SrcRegisterExtSwz.NegateW, - prev_token, - instruction, - header ); - prev_token = (struct tgsi_token *) src_register_ext_swz; - } - - if( tgsi_compare_src_register_ext_mod( - reg->SrcRegisterExtMod, - tgsi_default_src_register_ext_mod() ) ) { - struct tgsi_src_register_ext_mod *src_register_ext_mod; - - if( maxsize <= size ) - return 0; - src_register_ext_mod = - (struct tgsi_src_register_ext_mod *) &tokens[size]; - size++; - - *src_register_ext_mod = tgsi_build_src_register_ext_mod( - reg->SrcRegisterExtMod.Complement, - reg->SrcRegisterExtMod.Bias, - reg->SrcRegisterExtMod.Scale2X, - reg->SrcRegisterExtMod.Absolute, - reg->SrcRegisterExtMod.Negate, - prev_token, - instruction, - header ); - prev_token = (struct tgsi_token *) src_register_ext_mod; - } - - if( reg->SrcRegister.Indirect ) { + if( reg->Register.Indirect ) { struct tgsi_src_register *ind; if( maxsize <= size ) @@ -753,23 +627,24 @@ tgsi_build_full_instruction( size++; *ind = tgsi_build_src_register( - reg->SrcRegisterInd.File, - reg->SrcRegisterInd.SwizzleX, - reg->SrcRegisterInd.SwizzleY, - reg->SrcRegisterInd.SwizzleZ, - reg->SrcRegisterInd.SwizzleW, - reg->SrcRegisterInd.Negate, - reg->SrcRegisterInd.Indirect, - reg->SrcRegisterInd.Dimension, - reg->SrcRegisterInd.Index, + reg->Indirect.File, + reg->Indirect.SwizzleX, + reg->Indirect.SwizzleY, + reg->Indirect.SwizzleZ, + reg->Indirect.SwizzleW, + reg->Indirect.Negate, + reg->Indirect.Absolute, + reg->Indirect.Indirect, + reg->Indirect.Dimension, + reg->Indirect.Index, instruction, header ); } - if( reg->SrcRegister.Dimension ) { + if( reg->Register.Dimension ) { struct tgsi_dimension *dim; - assert( !reg->SrcRegisterDim.Dimension ); + assert( !reg->Dimension.Dimension ); if( maxsize <= size ) return 0; @@ -777,12 +652,12 @@ tgsi_build_full_instruction( size++; *dim = tgsi_build_dimension( - reg->SrcRegisterDim.Indirect, - reg->SrcRegisterDim.Index, + reg->Dimension.Indirect, + reg->Dimension.Index, instruction, header ); - if( reg->SrcRegisterDim.Indirect ) { + if( reg->Dimension.Indirect ) { struct tgsi_src_register *ind; if( maxsize <= size ) @@ -791,15 +666,16 @@ tgsi_build_full_instruction( size++; *ind = tgsi_build_src_register( - reg->SrcRegisterDimInd.File, - reg->SrcRegisterDimInd.SwizzleX, - reg->SrcRegisterDimInd.SwizzleY, - reg->SrcRegisterDimInd.SwizzleZ, - reg->SrcRegisterDimInd.SwizzleW, - reg->SrcRegisterDimInd.Negate, - reg->SrcRegisterDimInd.Indirect, - reg->SrcRegisterDimInd.Dimension, - reg->SrcRegisterDimInd.Index, + reg->DimIndirect.File, + reg->DimIndirect.SwizzleX, + reg->DimIndirect.SwizzleY, + reg->DimIndirect.SwizzleZ, + reg->DimIndirect.SwizzleW, + reg->DimIndirect.Negate, + reg->DimIndirect.Absolute, + reg->DimIndirect.Indirect, + reg->DimIndirect.Dimension, + reg->DimIndirect.Index, instruction, header ); } @@ -809,163 +685,103 @@ tgsi_build_full_instruction( return size; } -struct tgsi_instruction_ext_nv -tgsi_default_instruction_ext_nv( void ) +struct tgsi_instruction_predicate +tgsi_default_instruction_predicate(void) { - struct tgsi_instruction_ext_nv instruction_ext_nv; - - instruction_ext_nv.Type = TGSI_INSTRUCTION_EXT_TYPE_NV; - instruction_ext_nv.Precision = TGSI_PRECISION_DEFAULT; - instruction_ext_nv.CondDstIndex = 0; - instruction_ext_nv.CondFlowIndex = 0; - instruction_ext_nv.CondMask = TGSI_CC_TR; - instruction_ext_nv.CondSwizzleX = TGSI_SWIZZLE_X; - instruction_ext_nv.CondSwizzleY = TGSI_SWIZZLE_Y; - instruction_ext_nv.CondSwizzleZ = TGSI_SWIZZLE_Z; - instruction_ext_nv.CondSwizzleW = TGSI_SWIZZLE_W; - instruction_ext_nv.CondDstUpdate = 0; - instruction_ext_nv.CondFlowEnable = 0; - instruction_ext_nv.Padding = 0; - instruction_ext_nv.Extended = 0; - - return instruction_ext_nv; -} + struct tgsi_instruction_predicate instruction_predicate; + instruction_predicate.SwizzleX = TGSI_SWIZZLE_X; + instruction_predicate.SwizzleY = TGSI_SWIZZLE_Y; + instruction_predicate.SwizzleZ = TGSI_SWIZZLE_Z; + instruction_predicate.SwizzleW = TGSI_SWIZZLE_W; + instruction_predicate.Negate = 0; + instruction_predicate.Index = 0; + instruction_predicate.Padding = 0; -/** test for inequality of 32-bit values pointed to by a and b */ -static INLINE boolean -compare32(const void *a, const void *b) -{ - return *((uint32_t *) a) != *((uint32_t *) b); + return instruction_predicate; } - -unsigned -tgsi_compare_instruction_ext_nv( - struct tgsi_instruction_ext_nv a, - struct tgsi_instruction_ext_nv b ) +struct tgsi_instruction_predicate +tgsi_build_instruction_predicate(int index, + unsigned negate, + unsigned swizzleX, + unsigned swizzleY, + unsigned swizzleZ, + unsigned swizzleW, + struct tgsi_instruction *instruction, + struct tgsi_header *header) { - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return compare32(&a, &b); -} + struct tgsi_instruction_predicate instruction_predicate; -struct tgsi_instruction_ext_nv -tgsi_build_instruction_ext_nv( - unsigned precision, - unsigned cond_dst_index, - unsigned cond_flow_index, - unsigned cond_mask, - unsigned cond_swizzle_x, - unsigned cond_swizzle_y, - unsigned cond_swizzle_z, - unsigned cond_swizzle_w, - unsigned cond_dst_update, - unsigned cond_flow_enable, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_instruction_ext_nv instruction_ext_nv; - - instruction_ext_nv = tgsi_default_instruction_ext_nv(); - instruction_ext_nv.Precision = precision; - instruction_ext_nv.CondDstIndex = cond_dst_index; - instruction_ext_nv.CondFlowIndex = cond_flow_index; - instruction_ext_nv.CondMask = cond_mask; - instruction_ext_nv.CondSwizzleX = cond_swizzle_x; - instruction_ext_nv.CondSwizzleY = cond_swizzle_y; - instruction_ext_nv.CondSwizzleZ = cond_swizzle_z; - instruction_ext_nv.CondSwizzleW = cond_swizzle_w; - instruction_ext_nv.CondDstUpdate = cond_dst_update; - instruction_ext_nv.CondFlowEnable = cond_flow_enable; - - prev_token->Extended = 1; - instruction_grow( instruction, header ); + instruction_predicate = tgsi_default_instruction_predicate(); + instruction_predicate.SwizzleX = swizzleX; + instruction_predicate.SwizzleY = swizzleY; + instruction_predicate.SwizzleZ = swizzleZ; + instruction_predicate.SwizzleW = swizzleW; + instruction_predicate.Negate = negate; + instruction_predicate.Index = index; + + instruction_grow(instruction, header); - return instruction_ext_nv; + return instruction_predicate; } -struct tgsi_instruction_ext_label -tgsi_default_instruction_ext_label( void ) +struct tgsi_instruction_label +tgsi_default_instruction_label( void ) { - struct tgsi_instruction_ext_label instruction_ext_label; + struct tgsi_instruction_label instruction_label; - instruction_ext_label.Type = TGSI_INSTRUCTION_EXT_TYPE_LABEL; - instruction_ext_label.Label = 0; - instruction_ext_label.Padding = 0; - instruction_ext_label.Extended = 0; + instruction_label.Label = 0; + instruction_label.Padding = 0; - return instruction_ext_label; + return instruction_label; } -unsigned -tgsi_compare_instruction_ext_label( - struct tgsi_instruction_ext_label a, - struct tgsi_instruction_ext_label b ) -{ - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return compare32(&a, &b); -} - -struct tgsi_instruction_ext_label -tgsi_build_instruction_ext_label( +struct tgsi_instruction_label +tgsi_build_instruction_label( unsigned label, struct tgsi_token *prev_token, struct tgsi_instruction *instruction, struct tgsi_header *header ) { - struct tgsi_instruction_ext_label instruction_ext_label; + struct tgsi_instruction_label instruction_label; - instruction_ext_label = tgsi_default_instruction_ext_label(); - instruction_ext_label.Label = label; + instruction_label = tgsi_default_instruction_label(); + instruction_label.Label = label; + instruction->Label = 1; - prev_token->Extended = 1; instruction_grow( instruction, header ); - return instruction_ext_label; + return instruction_label; } -struct tgsi_instruction_ext_texture -tgsi_default_instruction_ext_texture( void ) +struct tgsi_instruction_texture +tgsi_default_instruction_texture( void ) { - struct tgsi_instruction_ext_texture instruction_ext_texture; + struct tgsi_instruction_texture instruction_texture; - instruction_ext_texture.Type = TGSI_INSTRUCTION_EXT_TYPE_TEXTURE; - instruction_ext_texture.Texture = TGSI_TEXTURE_UNKNOWN; - instruction_ext_texture.Padding = 0; - instruction_ext_texture.Extended = 0; + instruction_texture.Texture = TGSI_TEXTURE_UNKNOWN; + instruction_texture.Padding = 0; - return instruction_ext_texture; + return instruction_texture; } -unsigned -tgsi_compare_instruction_ext_texture( - struct tgsi_instruction_ext_texture a, - struct tgsi_instruction_ext_texture b ) -{ - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return compare32(&a, &b); -} - -struct tgsi_instruction_ext_texture -tgsi_build_instruction_ext_texture( +struct tgsi_instruction_texture +tgsi_build_instruction_texture( unsigned texture, struct tgsi_token *prev_token, struct tgsi_instruction *instruction, struct tgsi_header *header ) { - struct tgsi_instruction_ext_texture instruction_ext_texture; + struct tgsi_instruction_texture instruction_texture; - instruction_ext_texture = tgsi_default_instruction_ext_texture(); - instruction_ext_texture.Texture = texture; + instruction_texture = tgsi_default_instruction_texture(); + instruction_texture.Texture = texture; + instruction->Texture = 1; - prev_token->Extended = 1; instruction_grow( instruction, header ); - return instruction_ext_texture; + return instruction_texture; } struct tgsi_src_register @@ -979,10 +795,10 @@ tgsi_default_src_register( void ) src_register.SwizzleZ = TGSI_SWIZZLE_Z; src_register.SwizzleW = TGSI_SWIZZLE_W; src_register.Negate = 0; + src_register.Absolute = 0; src_register.Indirect = 0; src_register.Dimension = 0; src_register.Index = 0; - src_register.Extended = 0; return src_register; } @@ -995,6 +811,7 @@ tgsi_build_src_register( unsigned swizzle_z, unsigned swizzle_w, unsigned negate, + unsigned absolute, unsigned indirect, unsigned dimension, int index, @@ -1018,6 +835,7 @@ tgsi_build_src_register( src_register.SwizzleZ = swizzle_z; src_register.SwizzleW = swizzle_w; src_register.Negate = negate; + src_register.Absolute = absolute; src_register.Indirect = indirect; src_register.Dimension = dimension; src_register.Index = index; @@ -1032,145 +850,14 @@ tgsi_default_full_src_register( void ) { struct tgsi_full_src_register full_src_register; - full_src_register.SrcRegister = tgsi_default_src_register(); - full_src_register.SrcRegisterExtSwz = tgsi_default_src_register_ext_swz(); - full_src_register.SrcRegisterExtMod = tgsi_default_src_register_ext_mod(); - full_src_register.SrcRegisterInd = tgsi_default_src_register(); - full_src_register.SrcRegisterDim = tgsi_default_dimension(); - full_src_register.SrcRegisterDimInd = tgsi_default_src_register(); + full_src_register.Register = tgsi_default_src_register(); + full_src_register.Indirect = tgsi_default_src_register(); + full_src_register.Dimension = tgsi_default_dimension(); + full_src_register.DimIndirect = tgsi_default_src_register(); return full_src_register; } -struct tgsi_src_register_ext_swz -tgsi_default_src_register_ext_swz( void ) -{ - struct tgsi_src_register_ext_swz src_register_ext_swz; - - src_register_ext_swz.Type = TGSI_SRC_REGISTER_EXT_TYPE_SWZ; - src_register_ext_swz.ExtSwizzleX = TGSI_EXTSWIZZLE_X; - src_register_ext_swz.ExtSwizzleY = TGSI_EXTSWIZZLE_Y; - src_register_ext_swz.ExtSwizzleZ = TGSI_EXTSWIZZLE_Z; - src_register_ext_swz.ExtSwizzleW = TGSI_EXTSWIZZLE_W; - src_register_ext_swz.NegateX = 0; - src_register_ext_swz.NegateY = 0; - src_register_ext_swz.NegateZ = 0; - src_register_ext_swz.NegateW = 0; - src_register_ext_swz.Padding = 0; - src_register_ext_swz.Extended = 0; - - return src_register_ext_swz; -} - -unsigned -tgsi_compare_src_register_ext_swz( - struct tgsi_src_register_ext_swz a, - struct tgsi_src_register_ext_swz b ) -{ - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return compare32(&a, &b); -} - -struct tgsi_src_register_ext_swz -tgsi_build_src_register_ext_swz( - unsigned ext_swizzle_x, - unsigned ext_swizzle_y, - unsigned ext_swizzle_z, - unsigned ext_swizzle_w, - unsigned negate_x, - unsigned negate_y, - unsigned negate_z, - unsigned negate_w, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_src_register_ext_swz src_register_ext_swz; - - assert( ext_swizzle_x <= TGSI_EXTSWIZZLE_ONE ); - assert( ext_swizzle_y <= TGSI_EXTSWIZZLE_ONE ); - assert( ext_swizzle_z <= TGSI_EXTSWIZZLE_ONE ); - assert( ext_swizzle_w <= TGSI_EXTSWIZZLE_ONE ); - assert( negate_x <= 1 ); - assert( negate_y <= 1 ); - assert( negate_z <= 1 ); - assert( negate_w <= 1 ); - - src_register_ext_swz = tgsi_default_src_register_ext_swz(); - src_register_ext_swz.ExtSwizzleX = ext_swizzle_x; - src_register_ext_swz.ExtSwizzleY = ext_swizzle_y; - src_register_ext_swz.ExtSwizzleZ = ext_swizzle_z; - src_register_ext_swz.ExtSwizzleW = ext_swizzle_w; - src_register_ext_swz.NegateX = negate_x; - src_register_ext_swz.NegateY = negate_y; - src_register_ext_swz.NegateZ = negate_z; - src_register_ext_swz.NegateW = negate_w; - - prev_token->Extended = 1; - instruction_grow( instruction, header ); - - return src_register_ext_swz; -} - -struct tgsi_src_register_ext_mod -tgsi_default_src_register_ext_mod( void ) -{ - struct tgsi_src_register_ext_mod src_register_ext_mod; - - src_register_ext_mod.Type = TGSI_SRC_REGISTER_EXT_TYPE_MOD; - src_register_ext_mod.Complement = 0; - src_register_ext_mod.Bias = 0; - src_register_ext_mod.Scale2X = 0; - src_register_ext_mod.Absolute = 0; - src_register_ext_mod.Negate = 0; - src_register_ext_mod.Padding = 0; - src_register_ext_mod.Extended = 0; - - return src_register_ext_mod; -} - -unsigned -tgsi_compare_src_register_ext_mod( - struct tgsi_src_register_ext_mod a, - struct tgsi_src_register_ext_mod b ) -{ - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return compare32(&a, &b); -} - -struct tgsi_src_register_ext_mod -tgsi_build_src_register_ext_mod( - unsigned complement, - unsigned bias, - unsigned scale_2x, - unsigned absolute, - unsigned negate, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_src_register_ext_mod src_register_ext_mod; - - assert( complement <= 1 ); - assert( bias <= 1 ); - assert( scale_2x <= 1 ); - assert( absolute <= 1 ); - assert( negate <= 1 ); - - src_register_ext_mod = tgsi_default_src_register_ext_mod(); - src_register_ext_mod.Complement = complement; - src_register_ext_mod.Bias = bias; - src_register_ext_mod.Scale2X = scale_2x; - src_register_ext_mod.Absolute = absolute; - src_register_ext_mod.Negate = negate; - - prev_token->Extended = 1; - instruction_grow( instruction, header ); - - return src_register_ext_mod; -} struct tgsi_dimension tgsi_default_dimension( void ) @@ -1181,7 +868,6 @@ tgsi_default_dimension( void ) dimension.Dimension = 0; dimension.Padding = 0; dimension.Index = 0; - dimension.Extended = 0; return dimension; } @@ -1215,7 +901,6 @@ tgsi_default_dst_register( void ) dst_register.Dimension = 0; dst_register.Index = 0; dst_register.Padding = 0; - dst_register.Extended = 0; return dst_register; } @@ -1251,118 +936,9 @@ tgsi_default_full_dst_register( void ) { struct tgsi_full_dst_register full_dst_register; - full_dst_register.DstRegister = tgsi_default_dst_register(); - full_dst_register.DstRegisterInd = tgsi_default_src_register(); - full_dst_register.DstRegisterExtConcode = - tgsi_default_dst_register_ext_concode(); - full_dst_register.DstRegisterExtModulate = - tgsi_default_dst_register_ext_modulate(); + full_dst_register.Register = tgsi_default_dst_register(); + full_dst_register.Indirect = tgsi_default_src_register(); return full_dst_register; } -struct tgsi_dst_register_ext_concode -tgsi_default_dst_register_ext_concode( void ) -{ - struct tgsi_dst_register_ext_concode dst_register_ext_concode; - - dst_register_ext_concode.Type = TGSI_DST_REGISTER_EXT_TYPE_CONDCODE; - dst_register_ext_concode.CondMask = TGSI_CC_TR; - dst_register_ext_concode.CondSwizzleX = TGSI_SWIZZLE_X; - dst_register_ext_concode.CondSwizzleY = TGSI_SWIZZLE_Y; - dst_register_ext_concode.CondSwizzleZ = TGSI_SWIZZLE_Z; - dst_register_ext_concode.CondSwizzleW = TGSI_SWIZZLE_W; - dst_register_ext_concode.CondSrcIndex = 0; - dst_register_ext_concode.Padding = 0; - dst_register_ext_concode.Extended = 0; - - return dst_register_ext_concode; -} - -unsigned -tgsi_compare_dst_register_ext_concode( - struct tgsi_dst_register_ext_concode a, - struct tgsi_dst_register_ext_concode b ) -{ - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return compare32(&a, &b); -} - -struct tgsi_dst_register_ext_concode -tgsi_build_dst_register_ext_concode( - unsigned cc, - unsigned swizzle_x, - unsigned swizzle_y, - unsigned swizzle_z, - unsigned swizzle_w, - int index, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_dst_register_ext_concode dst_register_ext_concode; - - assert( cc <= TGSI_CC_FL ); - assert( swizzle_x <= TGSI_SWIZZLE_W ); - assert( swizzle_y <= TGSI_SWIZZLE_W ); - assert( swizzle_z <= TGSI_SWIZZLE_W ); - assert( swizzle_w <= TGSI_SWIZZLE_W ); - assert( index >= -32768 && index <= 32767 ); - - dst_register_ext_concode = tgsi_default_dst_register_ext_concode(); - dst_register_ext_concode.CondMask = cc; - dst_register_ext_concode.CondSwizzleX = swizzle_x; - dst_register_ext_concode.CondSwizzleY = swizzle_y; - dst_register_ext_concode.CondSwizzleZ = swizzle_z; - dst_register_ext_concode.CondSwizzleW = swizzle_w; - dst_register_ext_concode.CondSrcIndex = index; - - prev_token->Extended = 1; - instruction_grow( instruction, header ); - - return dst_register_ext_concode; -} - -struct tgsi_dst_register_ext_modulate -tgsi_default_dst_register_ext_modulate( void ) -{ - struct tgsi_dst_register_ext_modulate dst_register_ext_modulate; - - dst_register_ext_modulate.Type = TGSI_DST_REGISTER_EXT_TYPE_MODULATE; - dst_register_ext_modulate.Modulate = TGSI_MODULATE_1X; - dst_register_ext_modulate.Padding = 0; - dst_register_ext_modulate.Extended = 0; - - return dst_register_ext_modulate; -} - -unsigned -tgsi_compare_dst_register_ext_modulate( - struct tgsi_dst_register_ext_modulate a, - struct tgsi_dst_register_ext_modulate b ) -{ - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return compare32(&a, &b); -} - -struct tgsi_dst_register_ext_modulate -tgsi_build_dst_register_ext_modulate( - unsigned modulate, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_dst_register_ext_modulate dst_register_ext_modulate; - - assert( modulate <= TGSI_MODULATE_EIGHTH ); - - dst_register_ext_modulate = tgsi_default_dst_register_ext_modulate(); - dst_register_ext_modulate.Modulate = modulate; - - prev_token->Extended = 1; - instruction_grow( instruction, header ); - - return dst_register_ext_modulate; -} diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h index 17d977b059..ffea786770 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.h +++ b/src/gallium/auxiliary/tgsi/tgsi_build.h @@ -36,12 +36,6 @@ struct tgsi_token; extern "C" { #endif -/* - * version - */ - -struct tgsi_version -tgsi_build_version( void ); /* * header @@ -143,6 +137,7 @@ struct tgsi_instruction tgsi_build_instruction( unsigned opcode, unsigned saturate, + unsigned predicate, unsigned num_dst_regs, unsigned num_src_regs, struct tgsi_header *header ); @@ -157,55 +152,34 @@ tgsi_build_full_instruction( struct tgsi_header *header, unsigned maxsize ); -struct tgsi_instruction_ext_nv -tgsi_default_instruction_ext_nv( void ); +struct tgsi_instruction_predicate +tgsi_default_instruction_predicate(void); -unsigned -tgsi_compare_instruction_ext_nv( - struct tgsi_instruction_ext_nv a, - struct tgsi_instruction_ext_nv b ); - -struct tgsi_instruction_ext_nv -tgsi_build_instruction_ext_nv( - unsigned precision, - unsigned cond_dst_index, - unsigned cond_flow_index, - unsigned cond_mask, - unsigned cond_swizzle_x, - unsigned cond_swizzle_y, - unsigned cond_swizzle_z, - unsigned cond_swizzle_w, - unsigned cond_dst_update, - unsigned cond_flow_enable, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); - -struct tgsi_instruction_ext_label -tgsi_default_instruction_ext_label( void ); +struct tgsi_instruction_predicate +tgsi_build_instruction_predicate(int index, + unsigned negate, + unsigned swizzleX, + unsigned swizzleY, + unsigned swizzleZ, + unsigned swizzleW, + struct tgsi_instruction *instruction, + struct tgsi_header *header); -unsigned -tgsi_compare_instruction_ext_label( - struct tgsi_instruction_ext_label a, - struct tgsi_instruction_ext_label b ); +struct tgsi_instruction_label +tgsi_default_instruction_label( void ); -struct tgsi_instruction_ext_label -tgsi_build_instruction_ext_label( +struct tgsi_instruction_label +tgsi_build_instruction_label( unsigned label, struct tgsi_token *prev_token, struct tgsi_instruction *instruction, struct tgsi_header *header ); -struct tgsi_instruction_ext_texture -tgsi_default_instruction_ext_texture( void ); - -unsigned -tgsi_compare_instruction_ext_texture( - struct tgsi_instruction_ext_texture a, - struct tgsi_instruction_ext_texture b ); +struct tgsi_instruction_texture +tgsi_default_instruction_texture( void ); -struct tgsi_instruction_ext_texture -tgsi_build_instruction_ext_texture( +struct tgsi_instruction_texture +tgsi_build_instruction_texture( unsigned texture, struct tgsi_token *prev_token, struct tgsi_instruction *instruction, @@ -222,6 +196,7 @@ tgsi_build_src_register( unsigned swizzle_z, unsigned swizzle_w, unsigned negate, + unsigned absolute, unsigned indirect, unsigned dimension, int index, @@ -231,46 +206,6 @@ tgsi_build_src_register( struct tgsi_full_src_register tgsi_default_full_src_register( void ); -struct tgsi_src_register_ext_swz -tgsi_default_src_register_ext_swz( void ); - -unsigned -tgsi_compare_src_register_ext_swz( - struct tgsi_src_register_ext_swz a, - struct tgsi_src_register_ext_swz b ); - -struct tgsi_src_register_ext_swz -tgsi_build_src_register_ext_swz( - unsigned ext_swizzle_x, - unsigned ext_swizzle_y, - unsigned ext_swizzle_z, - unsigned ext_swizzle_w, - unsigned negate_x, - unsigned negate_y, - unsigned negate_z, - unsigned negate_w, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); - -struct tgsi_src_register_ext_mod -tgsi_default_src_register_ext_mod( void ); - -unsigned -tgsi_compare_src_register_ext_mod( - struct tgsi_src_register_ext_mod a, - struct tgsi_src_register_ext_mod b ); - -struct tgsi_src_register_ext_mod -tgsi_build_src_register_ext_mod( - unsigned complement, - unsigned bias, - unsigned scale_2x, - unsigned absolute, - unsigned negate, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); struct tgsi_dimension tgsi_default_dimension( void ); @@ -297,40 +232,6 @@ tgsi_build_dst_register( struct tgsi_full_dst_register tgsi_default_full_dst_register( void ); -struct tgsi_dst_register_ext_concode -tgsi_default_dst_register_ext_concode( void ); - -unsigned -tgsi_compare_dst_register_ext_concode( - struct tgsi_dst_register_ext_concode a, - struct tgsi_dst_register_ext_concode b ); - -struct tgsi_dst_register_ext_concode -tgsi_build_dst_register_ext_concode( - unsigned cc, - unsigned swizzle_x, - unsigned swizzle_y, - unsigned swizzle_z, - unsigned swizzle_w, - int index, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); - -struct tgsi_dst_register_ext_modulate -tgsi_default_dst_register_ext_modulate( void ); - -unsigned -tgsi_compare_dst_register_ext_modulate( - struct tgsi_dst_register_ext_modulate a, - struct tgsi_dst_register_ext_modulate b ); - -struct tgsi_dst_register_ext_modulate -tgsi_build_dst_register_ext_modulate( - unsigned modulate, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); #if defined __cplusplus } diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 111d95b666..d09ab92565 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -100,7 +100,8 @@ static const char *file_names[TGSI_FILE_COUNT] = "SAMP", "ADDR", "IMM", - "LOOP" + "LOOP", + "PRED" }; static const char *interpolate_names[] = @@ -148,26 +149,6 @@ static const char *texture_names[] = "SHADOWRECT" }; -static const char *extswizzle_names[] = -{ - "x", - "y", - "z", - "w", - "0", - "1" -}; - -static const char *modulate_names[TGSI_MODULATE_COUNT] = -{ - "", - "_2X", - "_4X", - "_8X", - "_D2", - "_D4", - "_D8" -}; static void _dump_register( @@ -243,19 +224,19 @@ iter_declaration( _dump_register( ctx, decl->Declaration.File, - decl->DeclarationRange.First, - decl->DeclarationRange.Last ); + decl->Range.First, + decl->Range.Last ); _dump_writemask( ctx, decl->Declaration.UsageMask ); if (decl->Declaration.Semantic) { TXT( ", " ); - ENM( decl->Semantic.SemanticName, semantic_names ); - if (decl->Semantic.SemanticIndex != 0 || - decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC) { + ENM( decl->Semantic.Name, semantic_names ); + if (decl->Semantic.Index != 0 || + decl->Semantic.Name == TGSI_SEMANTIC_GENERIC) { CHR( '[' ); - UID( decl->Semantic.SemanticIndex ); + UID( decl->Semantic.Index ); CHR( ']' ); } } @@ -371,117 +352,84 @@ iter_instruction( } for (i = 0; i < inst->Instruction.NumDstRegs; i++) { - const struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + const struct tgsi_full_dst_register *dst = &inst->Dst[i]; if (!first_reg) CHR( ',' ); CHR( ' ' ); - if (dst->DstRegister.Indirect) { + if (dst->Register.Indirect) { _dump_register_ind( ctx, - dst->DstRegister.File, - dst->DstRegister.Index, - dst->DstRegisterInd.File, - dst->DstRegisterInd.Index, - dst->DstRegisterInd.SwizzleX ); + dst->Register.File, + dst->Register.Index, + dst->Indirect.File, + dst->Indirect.Index, + dst->Indirect.SwizzleX ); } else { _dump_register( ctx, - dst->DstRegister.File, - dst->DstRegister.Index, - dst->DstRegister.Index ); + dst->Register.File, + dst->Register.Index, + dst->Register.Index ); } - ENM( dst->DstRegisterExtModulate.Modulate, modulate_names ); - _dump_writemask( ctx, dst->DstRegister.WriteMask ); + _dump_writemask( ctx, dst->Register.WriteMask ); first_reg = FALSE; } for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + const struct tgsi_full_src_register *src = &inst->Src[i]; if (!first_reg) CHR( ',' ); CHR( ' ' ); - if (src->SrcRegisterExtMod.Negate) + if (src->Register.Negate) TXT( "-(" ); - if (src->SrcRegisterExtMod.Absolute) + if (src->Register.Absolute) CHR( '|' ); - if (src->SrcRegisterExtMod.Scale2X) - TXT( "2*(" ); - if (src->SrcRegisterExtMod.Bias) - CHR( '(' ); - if (src->SrcRegisterExtMod.Complement) - TXT( "1-(" ); - if (src->SrcRegister.Negate) - CHR( '-' ); - - if (src->SrcRegister.Indirect) { + + if (src->Register.Indirect) { _dump_register_ind( ctx, - src->SrcRegister.File, - src->SrcRegister.Index, - src->SrcRegisterInd.File, - src->SrcRegisterInd.Index, - src->SrcRegisterInd.SwizzleX ); + src->Register.File, + src->Register.Index, + src->Indirect.File, + src->Indirect.Index, + src->Indirect.SwizzleX ); } else { _dump_register( ctx, - src->SrcRegister.File, - src->SrcRegister.Index, - src->SrcRegister.Index ); + src->Register.File, + src->Register.Index, + src->Register.Index ); } - if (src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X || - src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y || - src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z || - src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W) { - CHR( '.' ); - ENM( src->SrcRegister.SwizzleX, swizzle_names ); - ENM( src->SrcRegister.SwizzleY, swizzle_names ); - ENM( src->SrcRegister.SwizzleZ, swizzle_names ); - ENM( src->SrcRegister.SwizzleW, swizzle_names ); - } - if (src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X || - src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y || - src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z || - src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W) { + if (src->Register.SwizzleX != TGSI_SWIZZLE_X || + src->Register.SwizzleY != TGSI_SWIZZLE_Y || + src->Register.SwizzleZ != TGSI_SWIZZLE_Z || + src->Register.SwizzleW != TGSI_SWIZZLE_W) { CHR( '.' ); - if (src->SrcRegisterExtSwz.NegateX) - TXT("-"); - ENM( src->SrcRegisterExtSwz.ExtSwizzleX, extswizzle_names ); - if (src->SrcRegisterExtSwz.NegateY) - TXT("-"); - ENM( src->SrcRegisterExtSwz.ExtSwizzleY, extswizzle_names ); - if (src->SrcRegisterExtSwz.NegateZ) - TXT("-"); - ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, extswizzle_names ); - if (src->SrcRegisterExtSwz.NegateW) - TXT("-"); - ENM( src->SrcRegisterExtSwz.ExtSwizzleW, extswizzle_names ); + ENM( src->Register.SwizzleX, swizzle_names ); + ENM( src->Register.SwizzleY, swizzle_names ); + ENM( src->Register.SwizzleZ, swizzle_names ); + ENM( src->Register.SwizzleW, swizzle_names ); } - if (src->SrcRegisterExtMod.Complement) - CHR( ')' ); - if (src->SrcRegisterExtMod.Bias) - TXT( ")-.5" ); - if (src->SrcRegisterExtMod.Scale2X) - CHR( ')' ); - if (src->SrcRegisterExtMod.Absolute) + if (src->Register.Absolute) CHR( '|' ); - if (src->SrcRegisterExtMod.Negate) + if (src->Register.Negate) CHR( ')' ); first_reg = FALSE; } - if (inst->InstructionExtTexture.Texture != TGSI_TEXTURE_UNKNOWN) { + if (inst->Instruction.Texture) { TXT( ", " ); - ENM( inst->InstructionExtTexture.Texture, texture_names ); + ENM( inst->Texture.Texture, texture_names ); } switch (inst->Instruction.Opcode) { @@ -491,7 +439,7 @@ iter_instruction( case TGSI_OPCODE_ENDLOOP: case TGSI_OPCODE_CAL: TXT( " :" ); - UID( inst->InstructionExtLabel.Label ); + UID( inst->Label.Label ); break; } @@ -529,9 +477,6 @@ prolog( { struct dump_ctx *ctx = (struct dump_ctx *) iter; ENM( iter->processor.Processor, processor_type_names ); - UID( iter->version.MajorVersion ); - CHR( '.' ); - UID( iter->version.MinorVersion ); EOL(); return TRUE; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c index 4a9c02b141..47fd1dd590 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c @@ -79,7 +79,8 @@ static const char *TGSI_FILES[TGSI_FILE_COUNT] = "FILE_SAMPLER", "FILE_ADDRESS", "FILE_IMMEDIATE", - "FILE_LOOP" + "FILE_LOOP", + "FILE_PREDICATE" }; static const char *TGSI_INTERPOLATES[] = @@ -112,34 +113,6 @@ static const char *TGSI_SATS[] = "SAT_MINUS_PLUS_ONE" }; -static const char *TGSI_INSTRUCTION_EXTS[] = -{ - "INSTRUCTION_EXT_TYPE_NV", - "INSTRUCTION_EXT_TYPE_LABEL", - "INSTRUCTION_EXT_TYPE_TEXTURE" -}; - -static const char *TGSI_PRECISIONS[] = -{ - "PRECISION_DEFAULT", - "PRECISION_FLOAT32", - "PRECISION_FLOAT16", - "PRECISION_FIXED12" -}; - -static const char *TGSI_CCS[] = -{ - "CC_GT", - "CC_EQ", - "CC_LT", - "CC_UN", - "CC_GE", - "CC_LE", - "CC_NE", - "CC_TR", - "CC_FL" -}; - static const char *TGSI_SWIZZLES[] = { "SWIZZLE_X", @@ -161,22 +134,6 @@ static const char *TGSI_TEXTURES[] = "TEXTURE_SHADOWRECT" }; -static const char *TGSI_SRC_REGISTER_EXTS[] = -{ - "SRC_REGISTER_EXT_TYPE_SWZ", - "SRC_REGISTER_EXT_TYPE_MOD" -}; - -static const char *TGSI_EXTSWIZZLES[] = -{ - "EXTSWIZZLE_X", - "EXTSWIZZLE_Y", - "EXTSWIZZLE_Z", - "EXTSWIZZLE_W", - "EXTSWIZZLE_ZERO", - "EXTSWIZZLE_ONE" -}; - static const char *TGSI_WRITEMASKS[] = { "0", @@ -197,23 +154,6 @@ static const char *TGSI_WRITEMASKS[] = "WRITEMASK_XYZW" }; -static const char *TGSI_DST_REGISTER_EXTS[] = -{ - "DST_REGISTER_EXT_TYPE_CONDCODE", - "DST_REGISTER_EXT_TYPE_MODULATE" -}; - -static const char *TGSI_MODULATES[] = -{ - "MODULATE_1X", - "MODULATE_2X", - "MODULATE_4X", - "MODULATE_8X", - "MODULATE_HALF", - "MODULATE_QUARTER", - "MODULATE_EIGHTH" -}; - static void dump_declaration_verbose( struct tgsi_full_declaration *decl, @@ -246,6 +186,14 @@ dump_declaration_verbose( TXT( "\nSemantic : " ); UID( decl->Declaration.Semantic ); } + if (deflt || fd->Declaration.Centroid != decl->Declaration.Centroid) { + TXT("\nCentroid : "); + UID(decl->Declaration.Centroid); + } + if (deflt || fd->Declaration.Invariant != decl->Declaration.Invariant) { + TXT("\nInvariant : "); + UID(decl->Declaration.Invariant); + } if( ignored ) { TXT( "\nPadding : " ); UIX( decl->Declaration.Padding ); @@ -253,16 +201,16 @@ dump_declaration_verbose( EOL(); TXT( "\nFirst: " ); - UID( decl->DeclarationRange.First ); + UID( decl->Range.First ); TXT( "\nLast : " ); - UID( decl->DeclarationRange.Last ); + UID( decl->Range.Last ); if( decl->Declaration.Semantic ) { EOL(); - TXT( "\nSemanticName : " ); - ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS ); - TXT( "\nSemanticIndex: " ); - UID( decl->Semantic.SemanticIndex ); + TXT( "\nName : " ); + ENM( decl->Semantic.Name, TGSI_SEMANTICS ); + TXT( "\nIndex: " ); + UID( decl->Semantic.Index ); if( ignored ) { TXT( "\nPadding : " ); UIX( decl->Semantic.Padding ); @@ -322,318 +270,122 @@ dump_instruction_verbose( TXT( "\nNumSrcRegs : " ); UID( inst->Instruction.NumSrcRegs ); } + if (deflt || fi->Instruction.Predicate != inst->Instruction.Predicate) { + TXT("\nPredicate : "); + UID(inst->Instruction.Predicate); + } + if (deflt || fi->Instruction.Label != inst->Instruction.Label) { + TXT("\nLabel : "); + UID(inst->Instruction.Label); + } + if (deflt || fi->Instruction.Texture != inst->Instruction.Texture) { + TXT("\nTexture : "); + UID(inst->Instruction.Texture); + } if( ignored ) { TXT( "\nPadding : " ); UIX( inst->Instruction.Padding ); } - if( deflt || tgsi_compare_instruction_ext_nv( inst->InstructionExtNv, fi->InstructionExtNv ) ) { - EOL(); - TXT( "\nType : " ); - ENM( inst->InstructionExtNv.Type, TGSI_INSTRUCTION_EXTS ); - if( deflt || fi->InstructionExtNv.Precision != inst->InstructionExtNv.Precision ) { - TXT( "\nPrecision : " ); - ENM( inst->InstructionExtNv.Precision, TGSI_PRECISIONS ); - } - if( deflt || fi->InstructionExtNv.CondDstIndex != inst->InstructionExtNv.CondDstIndex ) { - TXT( "\nCondDstIndex : " ); - UID( inst->InstructionExtNv.CondDstIndex ); - } - if( deflt || fi->InstructionExtNv.CondFlowIndex != inst->InstructionExtNv.CondFlowIndex ) { - TXT( "\nCondFlowIndex : " ); - UID( inst->InstructionExtNv.CondFlowIndex ); - } - if( deflt || fi->InstructionExtNv.CondMask != inst->InstructionExtNv.CondMask ) { - TXT( "\nCondMask : " ); - ENM( inst->InstructionExtNv.CondMask, TGSI_CCS ); - } - if( deflt || fi->InstructionExtNv.CondSwizzleX != inst->InstructionExtNv.CondSwizzleX ) { - TXT( "\nCondSwizzleX : " ); - ENM( inst->InstructionExtNv.CondSwizzleX, TGSI_SWIZZLES ); - } - if( deflt || fi->InstructionExtNv.CondSwizzleY != inst->InstructionExtNv.CondSwizzleY ) { - TXT( "\nCondSwizzleY : " ); - ENM( inst->InstructionExtNv.CondSwizzleY, TGSI_SWIZZLES ); - } - if( deflt || fi->InstructionExtNv.CondSwizzleZ != inst->InstructionExtNv.CondSwizzleZ ) { - TXT( "\nCondSwizzleZ : " ); - ENM( inst->InstructionExtNv.CondSwizzleZ, TGSI_SWIZZLES ); - } - if( deflt || fi->InstructionExtNv.CondSwizzleW != inst->InstructionExtNv.CondSwizzleW ) { - TXT( "\nCondSwizzleW : " ); - ENM( inst->InstructionExtNv.CondSwizzleW, TGSI_SWIZZLES ); - } - if( deflt || fi->InstructionExtNv.CondDstUpdate != inst->InstructionExtNv.CondDstUpdate ) { - TXT( "\nCondDstUpdate : " ); - UID( inst->InstructionExtNv.CondDstUpdate ); - } - if( deflt || fi->InstructionExtNv.CondFlowEnable != inst->InstructionExtNv.CondFlowEnable ) { - TXT( "\nCondFlowEnable: " ); - UID( inst->InstructionExtNv.CondFlowEnable ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( inst->InstructionExtNv.Padding ); - if( deflt || fi->InstructionExtNv.Extended != inst->InstructionExtNv.Extended ) { - TXT( "\nExtended : " ); - UID( inst->InstructionExtNv.Extended ); - } - } - } - - if( deflt || tgsi_compare_instruction_ext_label( inst->InstructionExtLabel, fi->InstructionExtLabel ) ) { + if (deflt || inst->Instruction.Label) { EOL(); - TXT( "\nType : " ); - ENM( inst->InstructionExtLabel.Type, TGSI_INSTRUCTION_EXTS ); - if( deflt || fi->InstructionExtLabel.Label != inst->InstructionExtLabel.Label ) { + if (deflt || fi->Label.Label != inst->Label.Label) { TXT( "\nLabel : " ); - UID( inst->InstructionExtLabel.Label ); + UID(inst->Label.Label); } if( ignored ) { TXT( "\nPadding : " ); - UIX( inst->InstructionExtLabel.Padding ); - if( deflt || fi->InstructionExtLabel.Extended != inst->InstructionExtLabel.Extended ) { - TXT( "\nExtended: " ); - UID( inst->InstructionExtLabel.Extended ); - } + UIX(inst->Label.Padding); } } - if( deflt || tgsi_compare_instruction_ext_texture( inst->InstructionExtTexture, fi->InstructionExtTexture ) ) { + if (deflt || inst->Instruction.Texture) { EOL(); - TXT( "\nType : " ); - ENM( inst->InstructionExtTexture.Type, TGSI_INSTRUCTION_EXTS ); - if( deflt || fi->InstructionExtTexture.Texture != inst->InstructionExtTexture.Texture ) { + if (deflt || fi->Texture.Texture != inst->Texture.Texture) { TXT( "\nTexture : " ); - ENM( inst->InstructionExtTexture.Texture, TGSI_TEXTURES ); + ENM(inst->Texture.Texture, TGSI_TEXTURES); } if( ignored ) { TXT( "\nPadding : " ); - UIX( inst->InstructionExtTexture.Padding ); - if( deflt || fi->InstructionExtTexture.Extended != inst->InstructionExtTexture.Extended ) { - TXT( "\nExtended: " ); - UID( inst->InstructionExtTexture.Extended ); - } + UIX(inst->Texture.Padding); } } for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) { - struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; - struct tgsi_full_dst_register *fd = &fi->FullDstRegisters[i]; + struct tgsi_full_dst_register *dst = &inst->Dst[i]; + struct tgsi_full_dst_register *fd = &fi->Dst[i]; EOL(); TXT( "\nFile : " ); - ENM( dst->DstRegister.File, TGSI_FILES ); - if( deflt || fd->DstRegister.WriteMask != dst->DstRegister.WriteMask ) { + ENM( dst->Register.File, TGSI_FILES ); + if( deflt || fd->Register.WriteMask != dst->Register.WriteMask ) { TXT( "\nWriteMask: " ); - ENM( dst->DstRegister.WriteMask, TGSI_WRITEMASKS ); + ENM( dst->Register.WriteMask, TGSI_WRITEMASKS ); } if( ignored ) { - if( deflt || fd->DstRegister.Indirect != dst->DstRegister.Indirect ) { + if( deflt || fd->Register.Indirect != dst->Register.Indirect ) { TXT( "\nIndirect : " ); - UID( dst->DstRegister.Indirect ); + UID( dst->Register.Indirect ); } - if( deflt || fd->DstRegister.Dimension != dst->DstRegister.Dimension ) { + if( deflt || fd->Register.Dimension != dst->Register.Dimension ) { TXT( "\nDimension: " ); - UID( dst->DstRegister.Dimension ); + UID( dst->Register.Dimension ); } } - if( deflt || fd->DstRegister.Index != dst->DstRegister.Index ) { + if( deflt || fd->Register.Index != dst->Register.Index ) { TXT( "\nIndex : " ); - SID( dst->DstRegister.Index ); + SID( dst->Register.Index ); } if( ignored ) { TXT( "\nPadding : " ); - UIX( dst->DstRegister.Padding ); - if( deflt || fd->DstRegister.Extended != dst->DstRegister.Extended ) { - TXT( "\nExtended : " ); - UID( dst->DstRegister.Extended ); - } - } - - if( deflt || tgsi_compare_dst_register_ext_concode( dst->DstRegisterExtConcode, fd->DstRegisterExtConcode ) ) { - EOL(); - TXT( "\nType : " ); - ENM( dst->DstRegisterExtConcode.Type, TGSI_DST_REGISTER_EXTS ); - if( deflt || fd->DstRegisterExtConcode.CondMask != dst->DstRegisterExtConcode.CondMask ) { - TXT( "\nCondMask : " ); - ENM( dst->DstRegisterExtConcode.CondMask, TGSI_CCS ); - } - if( deflt || fd->DstRegisterExtConcode.CondSwizzleX != dst->DstRegisterExtConcode.CondSwizzleX ) { - TXT( "\nCondSwizzleX: " ); - ENM( dst->DstRegisterExtConcode.CondSwizzleX, TGSI_SWIZZLES ); - } - if( deflt || fd->DstRegisterExtConcode.CondSwizzleY != dst->DstRegisterExtConcode.CondSwizzleY ) { - TXT( "\nCondSwizzleY: " ); - ENM( dst->DstRegisterExtConcode.CondSwizzleY, TGSI_SWIZZLES ); - } - if( deflt || fd->DstRegisterExtConcode.CondSwizzleZ != dst->DstRegisterExtConcode.CondSwizzleZ ) { - TXT( "\nCondSwizzleZ: " ); - ENM( dst->DstRegisterExtConcode.CondSwizzleZ, TGSI_SWIZZLES ); - } - if( deflt || fd->DstRegisterExtConcode.CondSwizzleW != dst->DstRegisterExtConcode.CondSwizzleW ) { - TXT( "\nCondSwizzleW: " ); - ENM( dst->DstRegisterExtConcode.CondSwizzleW, TGSI_SWIZZLES ); - } - if( deflt || fd->DstRegisterExtConcode.CondSrcIndex != dst->DstRegisterExtConcode.CondSrcIndex ) { - TXT( "\nCondSrcIndex: " ); - UID( dst->DstRegisterExtConcode.CondSrcIndex ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( dst->DstRegisterExtConcode.Padding ); - if( deflt || fd->DstRegisterExtConcode.Extended != dst->DstRegisterExtConcode.Extended ) { - TXT( "\nExtended : " ); - UID( dst->DstRegisterExtConcode.Extended ); - } - } - } - - if( deflt || tgsi_compare_dst_register_ext_modulate( dst->DstRegisterExtModulate, fd->DstRegisterExtModulate ) ) { - EOL(); - TXT( "\nType : " ); - ENM( dst->DstRegisterExtModulate.Type, TGSI_DST_REGISTER_EXTS ); - if( deflt || fd->DstRegisterExtModulate.Modulate != dst->DstRegisterExtModulate.Modulate ) { - TXT( "\nModulate: " ); - ENM( dst->DstRegisterExtModulate.Modulate, TGSI_MODULATES ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( dst->DstRegisterExtModulate.Padding ); - if( deflt || fd->DstRegisterExtModulate.Extended != dst->DstRegisterExtModulate.Extended ) { - TXT( "\nExtended: " ); - UID( dst->DstRegisterExtModulate.Extended ); - } - } + UIX( dst->Register.Padding ); } } for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) { - struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; - struct tgsi_full_src_register *fs = &fi->FullSrcRegisters[i]; + struct tgsi_full_src_register *src = &inst->Src[i]; + struct tgsi_full_src_register *fs = &fi->Src[i]; EOL(); TXT( "\nFile : "); - ENM( src->SrcRegister.File, TGSI_FILES ); - if( deflt || fs->SrcRegister.SwizzleX != src->SrcRegister.SwizzleX ) { + ENM( src->Register.File, TGSI_FILES ); + if( deflt || fs->Register.SwizzleX != src->Register.SwizzleX ) { TXT( "\nSwizzleX : " ); - ENM( src->SrcRegister.SwizzleX, TGSI_SWIZZLES ); + ENM( src->Register.SwizzleX, TGSI_SWIZZLES ); } - if( deflt || fs->SrcRegister.SwizzleY != src->SrcRegister.SwizzleY ) { + if( deflt || fs->Register.SwizzleY != src->Register.SwizzleY ) { TXT( "\nSwizzleY : " ); - ENM( src->SrcRegister.SwizzleY, TGSI_SWIZZLES ); + ENM( src->Register.SwizzleY, TGSI_SWIZZLES ); } - if( deflt || fs->SrcRegister.SwizzleZ != src->SrcRegister.SwizzleZ ) { + if( deflt || fs->Register.SwizzleZ != src->Register.SwizzleZ ) { TXT( "\nSwizzleZ : " ); - ENM( src->SrcRegister.SwizzleZ, TGSI_SWIZZLES ); + ENM( src->Register.SwizzleZ, TGSI_SWIZZLES ); } - if( deflt || fs->SrcRegister.SwizzleW != src->SrcRegister.SwizzleW ) { + if( deflt || fs->Register.SwizzleW != src->Register.SwizzleW ) { TXT( "\nSwizzleW : " ); - ENM( src->SrcRegister.SwizzleW, TGSI_SWIZZLES ); + ENM( src->Register.SwizzleW, TGSI_SWIZZLES ); } - if( deflt || fs->SrcRegister.Negate != src->SrcRegister.Negate ) { + if (deflt || fs->Register.Absolute != src->Register.Absolute) { + TXT("\nAbsolute : "); + UID(src->Register.Absolute); + } + if( deflt || fs->Register.Negate != src->Register.Negate ) { TXT( "\nNegate : " ); - UID( src->SrcRegister.Negate ); + UID( src->Register.Negate ); } if( ignored ) { - if( deflt || fs->SrcRegister.Indirect != src->SrcRegister.Indirect ) { + if( deflt || fs->Register.Indirect != src->Register.Indirect ) { TXT( "\nIndirect : " ); - UID( src->SrcRegister.Indirect ); + UID( src->Register.Indirect ); } - if( deflt || fs->SrcRegister.Dimension != src->SrcRegister.Dimension ) { + if( deflt || fs->Register.Dimension != src->Register.Dimension ) { TXT( "\nDimension: " ); - UID( src->SrcRegister.Dimension ); + UID( src->Register.Dimension ); } } - if( deflt || fs->SrcRegister.Index != src->SrcRegister.Index ) { + if( deflt || fs->Register.Index != src->Register.Index ) { TXT( "\nIndex : " ); - SID( src->SrcRegister.Index ); - } - if( ignored ) { - if( deflt || fs->SrcRegister.Extended != src->SrcRegister.Extended ) { - TXT( "\nExtended : " ); - UID( src->SrcRegister.Extended ); - } - } - - if( deflt || tgsi_compare_src_register_ext_swz( src->SrcRegisterExtSwz, fs->SrcRegisterExtSwz ) ) { - EOL(); - TXT( "\nType : " ); - ENM( src->SrcRegisterExtSwz.Type, TGSI_SRC_REGISTER_EXTS ); - if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleX != src->SrcRegisterExtSwz.ExtSwizzleX ) { - TXT( "\nExtSwizzleX: " ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES ); - } - if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleY != src->SrcRegisterExtSwz.ExtSwizzleY ) { - TXT( "\nExtSwizzleY: " ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES ); - } - if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleZ != src->SrcRegisterExtSwz.ExtSwizzleZ ) { - TXT( "\nExtSwizzleZ: " ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES ); - } - if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleW != src->SrcRegisterExtSwz.ExtSwizzleW ) { - TXT( "\nExtSwizzleW: " ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES ); - } - if( deflt || fs->SrcRegisterExtSwz.NegateX != src->SrcRegisterExtSwz.NegateX ) { - TXT( "\nNegateX : " ); - UID( src->SrcRegisterExtSwz.NegateX ); - } - if( deflt || fs->SrcRegisterExtSwz.NegateY != src->SrcRegisterExtSwz.NegateY ) { - TXT( "\nNegateY : " ); - UID( src->SrcRegisterExtSwz.NegateY ); - } - if( deflt || fs->SrcRegisterExtSwz.NegateZ != src->SrcRegisterExtSwz.NegateZ ) { - TXT( "\nNegateZ : " ); - UID( src->SrcRegisterExtSwz.NegateZ ); - } - if( deflt || fs->SrcRegisterExtSwz.NegateW != src->SrcRegisterExtSwz.NegateW ) { - TXT( "\nNegateW : " ); - UID( src->SrcRegisterExtSwz.NegateW ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( src->SrcRegisterExtSwz.Padding ); - if( deflt || fs->SrcRegisterExtSwz.Extended != src->SrcRegisterExtSwz.Extended ) { - TXT( "\nExtended : " ); - UID( src->SrcRegisterExtSwz.Extended ); - } - } - } - - if( deflt || tgsi_compare_src_register_ext_mod( src->SrcRegisterExtMod, fs->SrcRegisterExtMod ) ) { - EOL(); - TXT( "\nType : " ); - ENM( src->SrcRegisterExtMod.Type, TGSI_SRC_REGISTER_EXTS ); - if( deflt || fs->SrcRegisterExtMod.Complement != src->SrcRegisterExtMod.Complement ) { - TXT( "\nComplement: " ); - UID( src->SrcRegisterExtMod.Complement ); - } - if( deflt || fs->SrcRegisterExtMod.Bias != src->SrcRegisterExtMod.Bias ) { - TXT( "\nBias : " ); - UID( src->SrcRegisterExtMod.Bias ); - } - if( deflt || fs->SrcRegisterExtMod.Scale2X != src->SrcRegisterExtMod.Scale2X ) { - TXT( "\nScale2X : " ); - UID( src->SrcRegisterExtMod.Scale2X ); - } - if( deflt || fs->SrcRegisterExtMod.Absolute != src->SrcRegisterExtMod.Absolute ) { - TXT( "\nAbsolute : " ); - UID( src->SrcRegisterExtMod.Absolute ); - } - if( deflt || fs->SrcRegisterExtMod.Negate != src->SrcRegisterExtMod.Negate ) { - TXT( "\nNegate : " ); - UID( src->SrcRegisterExtMod.Negate ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( src->SrcRegisterExtMod.Padding ); - if( deflt || fs->SrcRegisterExtMod.Extended != src->SrcRegisterExtMod.Extended ) { - TXT( "\nExtended : " ); - UID( src->SrcRegisterExtMod.Extended ); - } - } + SID( src->Register.Index ); } } } @@ -653,12 +405,6 @@ tgsi_dump_c( TXT( "tgsi-dump begin -----------------" ); - TXT( "\nMajorVersion: " ); - UID( parse.FullVersion.Version.MajorVersion ); - TXT( "\nMinorVersion: " ); - UID( parse.FullVersion.Version.MinorVersion ); - EOL(); - TXT( "\nHeaderSize: " ); UID( parse.FullHeader.Header.HeaderSize ); TXT( "\nBodySize : " ); @@ -678,10 +424,6 @@ tgsi_dump_c( if( ignored ) { TXT( "\nSize : " ); UID( parse.FullToken.Token.NrTokens ); - if( deflt || parse.FullToken.Token.Extended ) { - TXT( "\nExtended : " ); - UID( parse.FullToken.Token.Extended ); - } } switch( parse.FullToken.Token.Type ) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index c79c56debd..22984c3232 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -62,9 +62,6 @@ #define FAST_MATH 1 -/** for tgsi_full_instruction::Flags */ -#define SOA_DEPENDENCY_FLAG 0x1 - #define TILE_TOP_LEFT 0 #define TILE_TOP_RIGHT 1 #define TILE_BOTTOM_LEFT 2 @@ -107,12 +104,13 @@ #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C #define TEMP_R0 TGSI_EXEC_TEMP_R0 +#define TEMP_P0 TGSI_EXEC_TEMP_P0 #define IS_CHANNEL_ENABLED(INST, CHAN)\ - ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + ((INST).Dst[0].Register.WriteMask & (1 << (CHAN))) #define IS_CHANNEL_ENABLED2(INST, CHAN)\ - ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) + ((INST).Dst[1].Register.WriteMask & (1 << (CHAN))) #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ @@ -190,7 +188,7 @@ tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) { uint i, chan; - uint writemask = inst->FullDstRegisters[0].DstRegister.WriteMask; + uint writemask = inst->Dst[0].Register.WriteMask; if (writemask == TGSI_WRITEMASK_X || writemask == TGSI_WRITEMASK_Y || writemask == TGSI_WRITEMASK_Z || @@ -202,17 +200,16 @@ tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) /* loop over src regs */ for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - if ((inst->FullSrcRegisters[i].SrcRegister.File == - inst->FullDstRegisters[0].DstRegister.File) && - (inst->FullSrcRegisters[i].SrcRegister.Index == - inst->FullDstRegisters[0].DstRegister.Index)) { + if ((inst->Src[i].Register.File == + inst->Dst[0].Register.File) && + (inst->Src[i].Register.Index == + inst->Dst[0].Register.Index)) { /* loop over dest channels */ uint channelsWritten = 0x0; FOR_EACH_ENABLED_CHANNEL(*inst, chan) { /* check if we're reading a channel that's been written */ - uint swizzle = tgsi_util_get_full_src_register_extswizzle(&inst->FullSrcRegisters[i], chan); - if (swizzle <= TGSI_SWIZZLE_W && - (channelsWritten & (1 << swizzle))) { + uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan); + if (channelsWritten & (1 << swizzle)) { return TRUE; } @@ -332,20 +329,6 @@ tgsi_exec_machine_bind_shader( maxInstructions += 10; } - if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) { - uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode; - parse.FullToken.FullInstruction.Flags = SOA_DEPENDENCY_FLAG; - /* XXX we only handle SOA dependencies properly for MOV/SWZ - * at this time! - */ - if (opcode != TGSI_OPCODE_MOV && opcode != TGSI_OPCODE_SWZ) { - debug_printf("Warning: SOA dependency in instruction" - " is not handled:\n"); - tgsi_dump_instruction(&parse.FullToken.FullInstruction, - numInstructions); - } - } - memcpy(instructions + numInstructions, &parse.FullToken.FullInstruction, sizeof(instructions[0])); @@ -386,6 +369,7 @@ tgsi_exec_machine_create( void ) memset(mach, 0, sizeof(*mach)); mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; + mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0]; /* Setup constants. */ for( i = 0; i < 4; i++ ) { @@ -517,7 +501,7 @@ micro_ddy( dst->f[0] = dst->f[1] = dst->f[2] = - dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT]; + dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; } static void @@ -595,6 +579,24 @@ micro_exp2( dst->f[2] = util_fast_exp2( src->f[2] ); dst->f[3] = util_fast_exp2( src->f[3] ); #else + +#if DEBUG + /* Inf is okay for this instruction, so clamp it to silence assertions. */ + uint i; + union tgsi_exec_channel clamped; + + for (i = 0; i < 4; i++) { + if (src->f[i] > 127.99999f) { + clamped.f[i] = 127.99999f; + } else if (src->f[i] < -126.99999f) { + clamped.f[i] = -126.99999f; + } else { + clamped.f[i] = src->f[i]; + } + } + src = &clamped; +#endif + dst->f[0] = powf( 2.0f, src->f[0] ); dst->f[1] = powf( 2.0f, src->f[1] ); dst->f[2] = powf( 2.0f, src->f[2] ); @@ -1130,10 +1132,10 @@ fetch_src_file_channel( union tgsi_exec_channel *chan ) { switch( swizzle ) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: + case TGSI_SWIZZLE_X: + case TGSI_SWIZZLE_Y: + case TGSI_SWIZZLE_Z: + case TGSI_SWIZZLE_W: switch( file ) { case TGSI_FILE_CONSTANT: assert(mach->Consts); @@ -1188,6 +1190,17 @@ fetch_src_file_channel( chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; break; + case TGSI_FILE_PREDICATE: + assert(index->i[0] < TGSI_EXEC_NUM_PREDS); + assert(index->i[1] < TGSI_EXEC_NUM_PREDS); + assert(index->i[2] < TGSI_EXEC_NUM_PREDS); + assert(index->i[3] < TGSI_EXEC_NUM_PREDS); + chan->u[0] = mach->Predicates[0].xyzw[swizzle].u[0]; + chan->u[1] = mach->Predicates[0].xyzw[swizzle].u[1]; + chan->u[2] = mach->Predicates[0].xyzw[swizzle].u[2]; + chan->u[3] = mach->Predicates[0].xyzw[swizzle].u[3]; + break; + case TGSI_FILE_OUTPUT: /* vertex/fragment output vars can be read too */ chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; @@ -1201,14 +1214,6 @@ fetch_src_file_channel( } break; - case TGSI_EXTSWIZZLE_ZERO: - *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; - break; - - case TGSI_EXTSWIZZLE_ONE: - *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; - break; - default: assert( 0 ); } @@ -1228,13 +1233,13 @@ fetch_source( * * file[1], * where: - * file = SrcRegister.File - * [1] = SrcRegister.Index + * file = Register.File + * [1] = Register.Index */ index.i[0] = index.i[1] = index.i[2] = - index.i[3] = reg->SrcRegister.Index; + index.i[3] = reg->Register.Index; /* There is an extra source register that indirectly subscripts * a register file. The direct index now becomes an offset @@ -1242,11 +1247,11 @@ fetch_source( * * file[ind[2].x+1], * where: - * ind = SrcRegisterInd.File - * [2] = SrcRegisterInd.Index - * .x = SrcRegisterInd.SwizzleX + * ind = Indirect.File + * [2] = Indirect.Index + * .x = Indirect.SwizzleX */ - if (reg->SrcRegister.Indirect) { + if (reg->Register.Indirect) { union tgsi_exec_channel index2; union tgsi_exec_channel indir_index; const uint execmask = mach->ExecMask; @@ -1256,13 +1261,13 @@ fetch_source( index2.i[0] = index2.i[1] = index2.i[2] = - index2.i[3] = reg->SrcRegisterInd.Index; + index2.i[3] = reg->Indirect.Index; /* get current value of address register[swizzle] */ - swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X ); + swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); fetch_src_file_channel( mach, - reg->SrcRegisterInd.File, + reg->Indirect.File, swizzle, &index2, &indir_index ); @@ -1288,14 +1293,14 @@ fetch_source( * * file[1][3] == file[1*sizeof(file[1])+3], * where: - * [3] = SrcRegisterDim.Index + * [3] = Dimension.Index */ - if (reg->SrcRegister.Dimension) { + if (reg->Register.Dimension) { /* The size of the first-order array depends on the register file type. * We need to multiply the index to the first array to get an effective, * "flat" index that points to the beginning of the second-order array. */ - switch (reg->SrcRegister.File) { + switch (reg->Register.File) { case TGSI_FILE_INPUT: index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; @@ -1312,10 +1317,10 @@ fetch_source( assert( 0 ); } - index.i[0] += reg->SrcRegisterDim.Index; - index.i[1] += reg->SrcRegisterDim.Index; - index.i[2] += reg->SrcRegisterDim.Index; - index.i[3] += reg->SrcRegisterDim.Index; + index.i[0] += reg->Dimension.Index; + index.i[1] += reg->Dimension.Index; + index.i[2] += reg->Dimension.Index; + index.i[3] += reg->Dimension.Index; /* Again, the second subscript index can be addressed indirectly * identically to the first one. @@ -1324,11 +1329,11 @@ fetch_source( * * file[1][ind[4].y+3], * where: - * ind = SrcRegisterDimInd.File - * [4] = SrcRegisterDimInd.Index - * .y = SrcRegisterDimInd.SwizzleX + * ind = DimIndirect.File + * [4] = DimIndirect.Index + * .y = DimIndirect.SwizzleX */ - if (reg->SrcRegisterDim.Indirect) { + if (reg->Dimension.Indirect) { union tgsi_exec_channel index2; union tgsi_exec_channel indir_index; const uint execmask = mach->ExecMask; @@ -1337,12 +1342,12 @@ fetch_source( index2.i[0] = index2.i[1] = index2.i[2] = - index2.i[3] = reg->SrcRegisterDimInd.Index; + index2.i[3] = reg->DimIndirect.Index; - swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); + swizzle = tgsi_util_get_src_register_swizzle( ®->DimIndirect, CHAN_X ); fetch_src_file_channel( mach, - reg->SrcRegisterDimInd.File, + reg->DimIndirect.File, swizzle, &index2, &indir_index ); @@ -1362,15 +1367,15 @@ fetch_source( } /* If by any chance there was a need for a 3D array of register - * files, we would have to check whether SrcRegisterDim is followed + * files, we would have to check whether Dimension is followed * by a dimension register and continue the saga. */ } - swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); + swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); fetch_src_file_channel( mach, - reg->SrcRegister.File, + reg->Register.File, swizzle, &index, chan ); @@ -1392,10 +1397,6 @@ fetch_source( case TGSI_UTIL_SIGN_KEEP: break; } - - if (reg->SrcRegisterExtMod.Complement) { - micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan ); - } } static void @@ -1423,11 +1424,11 @@ store_dest( * * file[ind[2].x+1], * where: - * ind = DstRegisterInd.File - * [2] = DstRegisterInd.Index - * .x = DstRegisterInd.SwizzleX + * ind = Indirect.File + * [2] = Indirect.Index + * .x = Indirect.SwizzleX */ - if (reg->DstRegister.Indirect) { + if (reg->Register.Indirect) { union tgsi_exec_channel index; union tgsi_exec_channel indir_index; uint swizzle; @@ -1436,15 +1437,15 @@ store_dest( index.i[0] = index.i[1] = index.i[2] = - index.i[3] = reg->DstRegisterInd.Index; + index.i[3] = reg->Indirect.Index; /* get current value of address register[swizzle] */ - swizzle = tgsi_util_get_src_register_swizzle( ®->DstRegisterInd, CHAN_X ); + swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); /* fetch values from the address/indirection register */ fetch_src_file_channel( mach, - reg->DstRegisterInd.File, + reg->Indirect.File, swizzle, &index, &indir_index ); @@ -1453,138 +1454,84 @@ store_dest( offset = (int) indir_index.f[0]; } - switch (reg->DstRegister.File) { + switch (reg->Register.File) { case TGSI_FILE_NULL: dst = &null; break; case TGSI_FILE_OUTPUT: index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] - + reg->DstRegister.Index; + + reg->Register.Index; dst = &mach->Outputs[offset + index].xyzw[chan_index]; break; case TGSI_FILE_TEMPORARY: - index = reg->DstRegister.Index; + index = reg->Register.Index; assert( index < TGSI_EXEC_NUM_TEMPS ); dst = &mach->Temps[offset + index].xyzw[chan_index]; break; case TGSI_FILE_ADDRESS: - index = reg->DstRegister.Index; + index = reg->Register.Index; dst = &mach->Addrs[index].xyzw[chan_index]; break; + case TGSI_FILE_LOOP: + assert(reg->Register.Index == 0); + assert(mach->LoopCounterStackTop > 0); + assert(chan_index == CHAN_X); + dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index]; + break; + + case TGSI_FILE_PREDICATE: + index = reg->Register.Index; + assert(index < TGSI_EXEC_NUM_PREDS); + dst = &mach->Predicates[index].xyzw[chan_index]; + break; + default: assert( 0 ); return; } - if (inst->InstructionExtNv.CondFlowEnable) { - union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C]; + if (inst->Instruction.Predicate) { uint swizzle; - uint shift; - uint mask; - uint test; - - /* Only CC0 supported. - */ - assert( inst->InstructionExtNv.CondFlowIndex < 1 ); + union tgsi_exec_channel *pred; switch (chan_index) { case CHAN_X: - swizzle = inst->InstructionExtNv.CondSwizzleX; + swizzle = inst->Predicate.SwizzleX; break; case CHAN_Y: - swizzle = inst->InstructionExtNv.CondSwizzleY; + swizzle = inst->Predicate.SwizzleY; break; case CHAN_Z: - swizzle = inst->InstructionExtNv.CondSwizzleZ; + swizzle = inst->Predicate.SwizzleZ; break; case CHAN_W: - swizzle = inst->InstructionExtNv.CondSwizzleW; - break; - default: - assert( 0 ); - return; - } - - switch (swizzle) { - case TGSI_SWIZZLE_X: - shift = TGSI_EXEC_CC_X_SHIFT; - mask = TGSI_EXEC_CC_X_MASK; - break; - case TGSI_SWIZZLE_Y: - shift = TGSI_EXEC_CC_Y_SHIFT; - mask = TGSI_EXEC_CC_Y_MASK; - break; - case TGSI_SWIZZLE_Z: - shift = TGSI_EXEC_CC_Z_SHIFT; - mask = TGSI_EXEC_CC_Z_MASK; - break; - case TGSI_SWIZZLE_W: - shift = TGSI_EXEC_CC_W_SHIFT; - mask = TGSI_EXEC_CC_W_MASK; + swizzle = inst->Predicate.SwizzleW; break; default: - assert( 0 ); + assert(0); return; } - switch (inst->InstructionExtNv.CondMask) { - case TGSI_CC_GT: - test = ~(TGSI_EXEC_CC_GT << shift) & mask; - for (i = 0; i < QUAD_SIZE; i++) - if (cc->u[i] & test) - execmask &= ~(1 << i); - break; - - case TGSI_CC_EQ: - test = ~(TGSI_EXEC_CC_EQ << shift) & mask; - for (i = 0; i < QUAD_SIZE; i++) - if (cc->u[i] & test) - execmask &= ~(1 << i); - break; - - case TGSI_CC_LT: - test = ~(TGSI_EXEC_CC_LT << shift) & mask; - for (i = 0; i < QUAD_SIZE; i++) - if (cc->u[i] & test) - execmask &= ~(1 << i); - break; + assert(inst->Predicate.Index == 0); - case TGSI_CC_GE: - test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask; - for (i = 0; i < QUAD_SIZE; i++) - if (cc->u[i] & test) - execmask &= ~(1 << i); - break; + pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle]; - case TGSI_CC_LE: - test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask; - for (i = 0; i < QUAD_SIZE; i++) - if (cc->u[i] & test) + if (inst->Predicate.Negate) { + for (i = 0; i < QUAD_SIZE; i++) { + if (pred->u[i]) { execmask &= ~(1 << i); - break; - - case TGSI_CC_NE: - test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask; - for (i = 0; i < QUAD_SIZE; i++) - if (cc->u[i] & test) + } + } + } else { + for (i = 0; i < QUAD_SIZE; i++) { + if (!pred->u[i]) { execmask &= ~(1 << i); - break; - - case TGSI_CC_TR: - break; - - case TGSI_CC_FL: - for (i = 0; i < QUAD_SIZE; i++) - execmask &= ~(1 << i); - break; - - default: - assert( 0 ); - return; + } + } } } @@ -1622,58 +1569,13 @@ store_dest( default: assert( 0 ); } - - if (inst->InstructionExtNv.CondDstUpdate) { - union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C]; - uint shift; - uint mask; - - /* Only CC0 supported. - */ - assert( inst->InstructionExtNv.CondDstIndex < 1 ); - - switch (chan_index) { - case CHAN_X: - shift = TGSI_EXEC_CC_X_SHIFT; - mask = ~TGSI_EXEC_CC_X_MASK; - break; - case CHAN_Y: - shift = TGSI_EXEC_CC_Y_SHIFT; - mask = ~TGSI_EXEC_CC_Y_MASK; - break; - case CHAN_Z: - shift = TGSI_EXEC_CC_Z_SHIFT; - mask = ~TGSI_EXEC_CC_Z_MASK; - break; - case CHAN_W: - shift = TGSI_EXEC_CC_W_SHIFT; - mask = ~TGSI_EXEC_CC_W_MASK; - break; - default: - assert( 0 ); - return; - } - - for (i = 0; i < QUAD_SIZE; i++) - if (execmask & (1 << i)) { - cc->u[i] &= mask; - if (dst->f[i] < 0.0f) - cc->u[i] |= TGSI_EXEC_CC_LT << shift; - else if (dst->f[i] > 0.0f) - cc->u[i] |= TGSI_EXEC_CC_GT << shift; - else if (dst->f[i] == 0.0f) - cc->u[i] |= TGSI_EXEC_CC_EQ << shift; - else - cc->u[i] |= TGSI_EXEC_CC_UN << shift; - } - } } #define FETCH(VAL,INDEX,CHAN)\ - fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) + fetch_source (mach, VAL, &inst->Src[INDEX], CHAN) #define STORE(VAL,INDEX,CHAN)\ - store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) + store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN ) /** @@ -1689,10 +1591,8 @@ exec_kil(struct tgsi_exec_machine *mach, uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ union tgsi_exec_channel r[1]; - /* This mask stores component bits that were already tested. Note that - * we test if the value is less than zero, so 1.0 and 0.0 need not to be - * tested. */ - uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); + /* This mask stores component bits that were already tested. */ + uniquemask = 0; for (chan_index = 0; chan_index < 4; chan_index++) { @@ -1700,8 +1600,8 @@ exec_kil(struct tgsi_exec_machine *mach, uint i; /* unswizzle channel */ - swizzle = tgsi_util_get_full_src_register_extswizzle ( - &inst->FullSrcRegisters[0], + swizzle = tgsi_util_get_full_src_register_swizzle ( + &inst->Src[0], chan_index); /* check if the component has not been already tested */ @@ -1728,32 +1628,8 @@ exec_kilp(struct tgsi_exec_machine *mach, { uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ - if (inst->InstructionExtNv.CondFlowEnable) { - uint swizzle[4]; - uint chan_index; - - kilmask = 0x0; - - swizzle[0] = inst->InstructionExtNv.CondSwizzleX; - swizzle[1] = inst->InstructionExtNv.CondSwizzleY; - swizzle[2] = inst->InstructionExtNv.CondSwizzleZ; - swizzle[3] = inst->InstructionExtNv.CondSwizzleW; - - for (chan_index = 0; chan_index < 4; chan_index++) - { - uint i; - - for (i = 0; i < 4; i++) { - /* TODO: evaluate the condition code */ - if (0) - kilmask |= 1 << i; - } - } - } - else { - /* "unconditional" kil */ - kilmask = mach->ExecMask; - } + /* "unconditional" kil */ + kilmask = mach->ExecMask; mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; } @@ -1792,14 +1668,14 @@ exec_tex(struct tgsi_exec_machine *mach, boolean biasLod, boolean projected) { - const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + const uint unit = inst->Src[1].Register.Index; union tgsi_exec_channel r[4]; uint chan_index; float lodBias; /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ - switch (inst->InstructionExtTexture.Texture) { + switch (inst->Texture.Texture) { case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: @@ -1885,6 +1761,64 @@ exec_tex(struct tgsi_exec_machine *mach, } } +static void +exec_txd(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + const uint unit = inst->Src[3].Register.Index; + union tgsi_exec_channel r[4]; + uint chan_index; + + /* + * XXX: This is fake TXD -- the derivatives are not taken into account, yet. + */ + + switch (inst->Texture.Texture) { + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_SHADOW1D: + + FETCH(&r[0], 0, CHAN_X); + + fetch_texel(mach->Samplers[unit], + &r[0], &ZeroVec, &ZeroVec, 0.0f, /* S, T, P, BIAS */ + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ + break; + + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + fetch_texel(mach->Samplers[unit], + &r[0], &r[1], &r[2], 0.0f, /* inputs */ + &r[0], &r[1], &r[2], &r[3]); /* outputs */ + break; + + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + fetch_texel(mach->Samplers[unit], + &r[0], &r[1], &r[2], 0.0f, + &r[0], &r[1], &r[2], &r[3]); + break; + + default: + assert(0); + } + + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&r[chan_index], 0, chan_index); + } +} + /** * Evaluate a constant-valued coefficient at the position of the @@ -1954,53 +1888,58 @@ typedef void (* eval_coef_func)( unsigned chan ); static void -exec_declaration( - struct tgsi_exec_machine *mach, - const struct tgsi_full_declaration *decl ) +exec_declaration(struct tgsi_exec_machine *mach, + const struct tgsi_full_declaration *decl) { - if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { - if( decl->Declaration.File == TGSI_FILE_INPUT ) { - unsigned first, last, mask; - eval_coef_func eval; + if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { + if (decl->Declaration.File == TGSI_FILE_INPUT) { + uint first, last, mask; - first = decl->DeclarationRange.First; - last = decl->DeclarationRange.Last; + first = decl->Range.First; + last = decl->Range.Last; mask = decl->Declaration.UsageMask; - switch( decl->Declaration.Interpolate ) { - case TGSI_INTERPOLATE_CONSTANT: - eval = eval_constant_coef; - break; - - case TGSI_INTERPOLATE_LINEAR: - eval = eval_linear_coef; - break; + if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) { + assert(decl->Semantic.Index == 0); + assert(first == last); + assert(mask = TGSI_WRITEMASK_XYZW); - case TGSI_INTERPOLATE_PERSPECTIVE: - eval = eval_perspective_coef; - break; + mach->Inputs[first] = mach->QuadPos; + } else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { + uint i; - default: - eval = NULL; - assert( 0 ); - } - - if( mask == TGSI_WRITEMASK_XYZW ) { - unsigned i, j; + assert(decl->Semantic.Index == 0); + assert(first == last); - for( i = first; i <= last; i++ ) { - for( j = 0; j < NUM_CHANNELS; j++ ) { - eval( mach, i, j ); - } + for (i = 0; i < QUAD_SIZE; i++) { + mach->Inputs[first].xyzw[0].f[i] = mach->Face; + } + } else { + eval_coef_func eval; + uint i, j; + + switch (decl->Declaration.Interpolate) { + case TGSI_INTERPOLATE_CONSTANT: + eval = eval_constant_coef; + break; + + case TGSI_INTERPOLATE_LINEAR: + eval = eval_linear_coef; + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + eval = eval_perspective_coef; + break; + + default: + assert(0); + return; } - } - else { - unsigned i, j; - for( j = 0; j < NUM_CHANNELS; j++ ) { - if( mask & (1 << j) ) { - for( i = first; i <= last; i++ ) { - eval( mach, i, j ); + for (j = 0; j < NUM_CHANNELS; j++) { + if (mask & (1 << j)) { + for (i = first; i <= last; i++) { + eval(mach, i, j); } } } @@ -2017,6 +1956,7 @@ exec_instruction( { uint chan_index; union tgsi_exec_channel r[10]; + union tgsi_exec_channel d[8]; (*pc)++; @@ -2025,43 +1965,27 @@ exec_instruction( case TGSI_OPCODE_FLR: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_flr( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_flr(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: - if (inst->Flags & SOA_DEPENDENCY_FLAG) { - /* Do all fetches into temp regs, then do all stores to avoid - * intermediate/accidental clobbering. This could be done all the - * time for MOV but for other instructions we'll need more temps... - */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[chan_index], 0, chan_index ); - } - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[chan_index], 0, chan_index ); - } + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&d[chan_index], 0, chan_index); } - else { - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - STORE( &r[0], 0, chan_index ); - } + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE(&d[chan_index], 0, chan_index); } break; case TGSI_OPCODE_LIT: - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { FETCH( &r[0], 0, CHAN_X ); if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, CHAN_Y ); + micro_max(&d[CHAN_Y], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); } if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { @@ -2072,11 +1996,19 @@ exec_instruction( micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); micro_pow( &r[1], &r[1], &r[2] ); - micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, CHAN_Z ); + micro_lt(&d[CHAN_Z], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); } - } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { + STORE(&d[CHAN_Y], 0, CHAN_Y); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { + STORE(&d[CHAN_Z], 0, CHAN_Z); + } + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); + } if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); } @@ -2144,14 +2076,13 @@ exec_instruction( break; case TGSI_OPCODE_MUL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) - { + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); - - micro_mul( &r[0], &r[0], &r[1] ); - - STORE(&r[0], 0, chan_index); + micro_mul(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2159,8 +2090,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_add( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); + micro_add(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2216,25 +2149,29 @@ exec_instruction( break; case TGSI_OPCODE_DST: - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { FETCH( &r[0], 0, CHAN_Y ); FETCH( &r[1], 1, CHAN_Y); - micro_mul( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, CHAN_Y ); + micro_mul(&d[CHAN_Y], &r[0], &r[1]); } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( &r[0], 0, CHAN_Z ); - STORE( &r[0], 0, CHAN_Z ); + FETCH(&d[CHAN_Z], 0, CHAN_Z); } - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - FETCH( &r[0], 1, CHAN_W ); - STORE( &r[0], 0, CHAN_W ); + FETCH(&d[CHAN_W], 1, CHAN_W); + } + + if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { + STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { + STORE(&d[CHAN_Y], 0, CHAN_Y); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { + STORE(&d[CHAN_Z], 0, CHAN_Z); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { + STORE(&d[CHAN_W], 0, CHAN_W); } break; @@ -2244,9 +2181,10 @@ exec_instruction( FETCH(&r[1], 1, chan_index); /* XXX use micro_min()?? */ - micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] ); - - STORE(&r[0], 0, chan_index); + micro_lt(&d[chan_index], &r[0], &r[1], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2256,9 +2194,10 @@ exec_instruction( FETCH(&r[1], 1, chan_index); /* XXX use micro_max()?? */ - micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] ); - - STORE(&r[0], 0, chan_index ); + micro_lt(&d[chan_index], &r[0], &r[1], &r[1], &r[0] ); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2267,8 +2206,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, chan_index ); + micro_lt(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2277,8 +2218,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, chan_index ); + micro_le(&d[chan_index], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2289,8 +2232,10 @@ exec_instruction( FETCH( &r[1], 1, chan_index ); micro_mul( &r[0], &r[0], &r[1] ); FETCH( &r[1], 2, chan_index ); - micro_add( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); + micro_add(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2298,10 +2243,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); - - micro_sub( &r[0], &r[0], &r[1] ); - - STORE(&r[0], 0, chan_index); + micro_sub(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2310,12 +2255,12 @@ exec_instruction( FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); FETCH(&r[2], 2, chan_index); - micro_sub( &r[1], &r[1], &r[2] ); micro_mul( &r[0], &r[0], &r[1] ); - micro_add( &r[0], &r[0], &r[2] ); - - STORE(&r[0], 0, chan_index); + micro_add(&d[chan_index], &r[0], &r[2]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2324,8 +2269,10 @@ exec_instruction( FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); FETCH(&r[2], 2, chan_index); - micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]); - STORE(&r[0], 0, chan_index); + micro_lt(&d[chan_index], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2350,8 +2297,10 @@ exec_instruction( case TGSI_OPCODE_FRC: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_frc( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_frc(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2361,8 +2310,10 @@ exec_instruction( FETCH(&r[1], 1, chan_index); micro_max(&r[0], &r[0], &r[1]); FETCH(&r[1], 2, chan_index); - micro_min(&r[0], &r[0], &r[1]); - STORE(&r[0], 0, chan_index); + micro_min(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2370,19 +2321,17 @@ exec_instruction( case TGSI_OPCODE_ARR: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_rnd( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_rnd(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; case TGSI_OPCODE_EX2: FETCH(&r[0], 0, CHAN_X); -#if FAST_MATH micro_exp2( &r[0], &r[0] ); -#else - micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] ); -#endif FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { STORE( &r[0], 0, chan_index ); @@ -2418,11 +2367,7 @@ exec_instruction( FETCH(&r[4], 1, CHAN_Y); micro_mul( &r[5], &r[3], &r[4] ); - micro_sub( &r[2], &r[2], &r[5] ); - - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &r[2], 0, CHAN_X ); - } + micro_sub(&d[CHAN_X], &r[2], &r[5]); FETCH(&r[2], 1, CHAN_X); @@ -2431,20 +2376,21 @@ exec_instruction( FETCH(&r[5], 0, CHAN_X); micro_mul( &r[1], &r[1], &r[5] ); - micro_sub( &r[3], &r[3], &r[1] ); - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - STORE( &r[3], 0, CHAN_Y ); - } + micro_sub(&d[CHAN_Y], &r[3], &r[1]); micro_mul( &r[5], &r[5], &r[4] ); micro_mul( &r[0], &r[0], &r[2] ); - micro_sub( &r[5], &r[5], &r[0] ); + micro_sub(&d[CHAN_Z], &r[5], &r[0]); - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - STORE( &r[5], 0, CHAN_Z ); + if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { + STORE(&d[CHAN_X], 0, CHAN_X); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { + STORE(&d[CHAN_Y], 0, CHAN_Y); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { + STORE(&d[CHAN_Z], 0, CHAN_Z); } - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); } @@ -2453,11 +2399,11 @@ exec_instruction( case TGSI_OPCODE_ABS: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH(&r[0], 0, chan_index); - - micro_abs( &r[0], &r[0] ); - - STORE(&r[0], 0, chan_index); + micro_abs(&d[chan_index], &r[0]); } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); + } break; case TGSI_OPCODE_RCC: @@ -2509,16 +2455,20 @@ exec_instruction( case TGSI_OPCODE_DDX: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_ddx( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_ddx(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; case TGSI_OPCODE_DDY: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_ddy( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_ddy(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2599,10 +2549,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_eq( &r[0], &r[0], &r[1], - &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], - &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, chan_index ); + micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2616,8 +2566,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); - STORE( &r[0], 0, chan_index ); + micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2633,8 +2585,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, chan_index ); + micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2642,8 +2596,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); - STORE( &r[0], 0, chan_index ); + micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2673,7 +2629,7 @@ exec_instruction( /* src[1] = d[strq]/dx */ /* src[2] = d[strq]/dy */ /* src[3] = sampler unit */ - assert (0); + exec_txd(mach, inst); break; case TGSI_OPCODE_TXL: @@ -2717,13 +2673,8 @@ exec_instruction( micro_mul(&r[3], &r[3], &r[1]); micro_add(&r[2], &r[2], &r[3]); FETCH(&r[3], 0, CHAN_X); - micro_add(&r[2], &r[2], &r[3]); - if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { - STORE(&r[2], 0, CHAN_X); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { - STORE(&r[2], 0, CHAN_Z); - } + micro_add(&d[CHAN_X], &r[2], &r[3]); + } if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) || IS_CHANNEL_ENABLED(*inst, CHAN_W)) { @@ -2733,13 +2684,20 @@ exec_instruction( micro_mul(&r[3], &r[3], &r[1]); micro_add(&r[2], &r[2], &r[3]); FETCH(&r[3], 0, CHAN_Y); - micro_add(&r[2], &r[2], &r[3]); - if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { - STORE(&r[2], 0, CHAN_Y); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { - STORE(&r[2], 0, CHAN_W); - } + micro_add(&d[CHAN_Y], &r[2], &r[3]); + + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { + STORE(&d[CHAN_X], 0, CHAN_X); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { + STORE(&d[CHAN_Y], 0, CHAN_Y); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { + STORE(&d[CHAN_X], 0, CHAN_Z); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { + STORE(&d[CHAN_Y], 0, CHAN_W); } break; @@ -2782,7 +2740,7 @@ exec_instruction( mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; /* Finally, jump to the subroutine */ - *pc = inst->InstructionExtLabel.Label; + *pc = inst->Label.Label; } break; @@ -2824,8 +2782,10 @@ exec_instruction( /* TGSI_OPCODE_SGN */ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_sgn( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_sgn(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2834,10 +2794,10 @@ exec_instruction( FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); FETCH(&r[2], 2, chan_index); - - micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] ); - - STORE(&r[0], 0, chan_index); + micro_lt(&d[chan_index], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -3012,32 +2972,40 @@ exec_instruction( case TGSI_OPCODE_CEIL: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_ceil( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_ceil(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; case TGSI_OPCODE_I2F: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_i2f( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_i2f(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; case TGSI_OPCODE_NOT: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_not( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_not(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; case TGSI_OPCODE_TRUNC: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_trunc( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_trunc(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -3045,8 +3013,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_shl( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); + micro_shl(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -3054,8 +3024,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_ishr( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); + micro_ishr(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -3063,8 +3035,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_and( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); + micro_and(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -3072,8 +3046,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_or( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); + micro_or(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -3085,8 +3061,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_xor( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); + micro_xor(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -3117,8 +3095,23 @@ exec_instruction( for (chan_index = 0; chan_index < 3; chan_index++) { FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index ); } - STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X ); ++mach->LoopCounterStackTop; + STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X); + /* update LoopMask */ + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { + mach->LoopMask &= ~0x1; + } + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { + mach->LoopMask &= ~0x2; + } + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { + mach->LoopMask &= ~0x4; + } + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { + mach->LoopMask &= ~0x8; + } + /* TODO: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); /* fall-through (for now) */ case TGSI_OPCODE_BGNLOOP: /* push LoopMask and ContMasks */ @@ -3132,28 +3125,28 @@ exec_instruction( case TGSI_OPCODE_ENDFOR: assert(mach->LoopCounterStackTop > 0); - micro_sub( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], - &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], - &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); + micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], + &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], + &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); /* update LoopMask */ - if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[0] <= 0) { + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { mach->LoopMask &= ~0x1; } - if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[1] <= 0 ) { + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { mach->LoopMask &= ~0x2; } - if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[2] <= 0 ) { + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { mach->LoopMask &= ~0x4; } - if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[3] <= 0 ) { + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { mach->LoopMask &= ~0x8; } - micro_add( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], - &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], - &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]); + micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], + &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], + &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]); assert(mach->LoopLabelStackTop > 0); inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1]; - STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X ); + STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X); /* Restore ContMask, but don't pop */ assert(mach->ContStackTop > 0); mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; @@ -3220,23 +3213,28 @@ exec_instruction( break; case TGSI_OPCODE_ENDSUB: - /* no-op */ - break; + /* + * XXX: This really should be a no-op. We should never reach this opcode. + */ - case TGSI_OPCODE_NOISE1: - assert( 0 ); - break; + assert(mach->CallStackTop > 0); + mach->CallStackTop--; - case TGSI_OPCODE_NOISE2: - assert( 0 ); - break; + mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; + mach->CondMask = mach->CondStack[mach->CondStackTop]; - case TGSI_OPCODE_NOISE3: - assert( 0 ); - break; + mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; + mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; - case TGSI_OPCODE_NOISE4: - assert( 0 ); + mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; + mach->ContMask = mach->ContStack[mach->ContStackTop]; + + assert(mach->FuncStackTop > 0); + mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; + + *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; + + UPDATE_EXEC_MASK(mach); break; case TGSI_OPCODE_NOP: @@ -3247,6 +3245,8 @@ exec_instruction( } } +#define DEBUG_EXECUTION 0 + /** * Run TGSI interpreter. @@ -3290,10 +3290,67 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) exec_declaration( mach, mach->Declarations+i ); } - /* execute instructions, until pc is set to -1 */ - while (pc != -1) { - assert(pc < (int) mach->NumInstructions); - exec_instruction( mach, mach->Instructions + pc, &pc ); + { +#if DEBUG_EXECUTION + struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS]; + struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS]; + uint inst = 1; + + memcpy(temps, mach->Temps, sizeof(temps)); + memcpy(outputs, mach->Outputs, sizeof(outputs)); +#endif + + /* execute instructions, until pc is set to -1 */ + while (pc != -1) { + +#if DEBUG_EXECUTION + uint i; + + tgsi_dump_instruction(&mach->Instructions[pc], inst++); +#endif + + assert(pc < (int) mach->NumInstructions); + exec_instruction(mach, mach->Instructions + pc, &pc); + +#if DEBUG_EXECUTION + for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) { + if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) { + uint j; + + memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i])); + debug_printf("TEMP[%2u] = ", i); + for (j = 0; j < 4; j++) { + if (j > 0) { + debug_printf(" "); + } + debug_printf("(%6f, %6f, %6f, %6f)\n", + temps[i].xyzw[0].f[j], + temps[i].xyzw[1].f[j], + temps[i].xyzw[2].f[j], + temps[i].xyzw[3].f[j]); + } + } + } + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) { + uint j; + + memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i])); + debug_printf("OUT[%2u] = ", i); + for (j = 0; j < 4; j++) { + if (j > 0) { + debug_printf(" "); + } + debug_printf("{%6f, %6f, %6f, %6f}\n", + outputs[i].xyzw[0].f[j], + outputs[i].xyzw[1].f[j], + outputs[i].xyzw[2].f[j], + outputs[i].xyzw[3].f[j]); + } + } + } +#endif + } } #if 0 @@ -3307,5 +3364,10 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) } #endif + assert(mach->CondStackTop == 0); + assert(mach->LoopStackTop == 0); + assert(mach->ContStackTop == 0); + assert(mach->CallStackTop == 0); + return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index c72f76809d..fd94c1bc44 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -168,13 +168,18 @@ struct tgsi_exec_labels #define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 8) #define TGSI_EXEC_NUM_ADDRS 1 -#define TGSI_EXEC_NUM_TEMP_EXTRAS 9 +/* predicate register */ +#define TGSI_EXEC_TEMP_P0 (TGSI_EXEC_NUM_TEMPS + 9) +#define TGSI_EXEC_NUM_PREDS 1 +#define TGSI_EXEC_NUM_TEMP_EXTRAS 10 -#define TGSI_EXEC_MAX_COND_NESTING 20 -#define TGSI_EXEC_MAX_LOOP_NESTING 20 -#define TGSI_EXEC_MAX_CALL_NESTING 20 + + +#define TGSI_EXEC_MAX_COND_NESTING 32 +#define TGSI_EXEC_MAX_LOOP_NESTING 32 +#define TGSI_EXEC_MAX_CALL_NESTING 32 /* The maximum number of input attributes per vertex. For 2D * input register files, this is the stride between two 1D @@ -213,6 +218,7 @@ struct tgsi_exec_machine struct tgsi_exec_vector Outputs[PIPE_MAX_ATTRIBS]; struct tgsi_exec_vector *Addrs; + struct tgsi_exec_vector *Predicates; struct tgsi_sampler **Samplers; @@ -227,6 +233,7 @@ struct tgsi_exec_machine /* FRAGMENT processor only. */ const struct tgsi_interp_coef *InterpCoefs; struct tgsi_exec_vector QuadPos; + float Face; /**< +1 if front facing, -1 if back facing */ /* Conditional execution masks */ uint CondMask; /**< For IF/ELSE/ENDIF */ @@ -247,7 +254,7 @@ struct tgsi_exec_machine uint LoopLabelStack[TGSI_EXEC_MAX_LOOP_NESTING]; int LoopLabelStackTop; - /** Loop counter stack (x = count, y = current, z = step) */ + /** Loop counter stack (x = index, y = counter, z = step) */ struct tgsi_exec_vector LoopCounterStack[TGSI_EXEC_MAX_LOOP_NESTING]; int LoopCounterStackTop; diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index 17af4cb7ad..be375cabb8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -134,10 +134,10 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 0, 0, 0, 0, 0, 1, "BGNSUB", TGSI_OPCODE_BGNSUB }, { 0, 0, 0, 1, 1, 0, "ENDLOOP", TGSI_OPCODE_ENDLOOP }, { 0, 0, 0, 0, 1, 0, "ENDSUB", TGSI_OPCODE_ENDSUB }, - { 1, 1, 0, 0, 0, 0, "NOISE1", TGSI_OPCODE_NOISE1 }, - { 1, 1, 0, 0, 0, 0, "NOISE2", TGSI_OPCODE_NOISE2 }, - { 1, 1, 0, 0, 0, 0, "NOISE3", TGSI_OPCODE_NOISE3 }, - { 1, 1, 0, 0, 0, 0, "NOISE4", TGSI_OPCODE_NOISE4 }, + { 0, 0, 0, 0, 0, 0, "", 103 }, /* removed */ + { 0, 0, 0, 0, 0, 0, "", 104 }, /* removed */ + { 0, 0, 0, 0, 0, 0, "", 105 }, /* removed */ + { 0, 0, 0, 0, 0, 0, "", 106 }, /* removed */ { 0, 0, 0, 0, 0, 0, "NOP", TGSI_OPCODE_NOP }, { 0, 0, 0, 0, 0, 0, "", 108 }, /* removed */ { 0, 0, 0, 0, 0, 0, "", 109 }, /* removed */ @@ -149,7 +149,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 0, 1, 0, 0, 0, 0, "BREAKC", TGSI_OPCODE_BREAKC }, { 0, 1, 0, 0, 0, 0, "KIL", TGSI_OPCODE_KIL }, { 0, 0, 0, 0, 0, 0, "END", TGSI_OPCODE_END }, - { 1, 1, 0, 0, 0, 0, "SWZ", TGSI_OPCODE_SWZ } + { 0, 0, 0, 0, 0, 0, "", 118 } /* removed */ }; const struct tgsi_opcode_info * diff --git a/src/gallium/auxiliary/tgsi/tgsi_iterate.c b/src/gallium/auxiliary/tgsi/tgsi_iterate.c index d88c2558d8..7b384f5e12 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_iterate.c +++ b/src/gallium/auxiliary/tgsi/tgsi_iterate.c @@ -39,7 +39,6 @@ tgsi_iterate_shader( return FALSE; ctx->processor = parse.FullHeader.Processor; - ctx->version = parse.FullVersion.Version; if (ctx->prolog) if (!ctx->prolog( ctx )) diff --git a/src/gallium/auxiliary/tgsi/tgsi_iterate.h b/src/gallium/auxiliary/tgsi/tgsi_iterate.h index ec7b85bf63..ef5a33ebce 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_iterate.h +++ b/src/gallium/auxiliary/tgsi/tgsi_iterate.h @@ -61,7 +61,6 @@ struct tgsi_iterate_context struct tgsi_iterate_context *ctx ); struct tgsi_processor processor; - struct tgsi_version version; }; boolean diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index e7bcf4bf75..b34263da48 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -139,10 +139,6 @@ OP00_LBL(BGNLOOP) OP00(BGNSUB) OP00_LBL(ENDLOOP) OP00(ENDSUB) -OP11(NOISE1) -OP11(NOISE2) -OP11(NOISE3) -OP11(NOISE4) OP00(NOP) OP11(NRM4) OP01(CALLNZ) @@ -150,7 +146,6 @@ OP01(IFC) OP01(BREAKC) OP01(KIL) OP00(END) -OP11(SWZ) #undef OP00 diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index 4870f82b6b..8f2b6a307d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -28,44 +28,23 @@ #include "util/u_debug.h" #include "pipe/p_shader_tokens.h" #include "tgsi_parse.h" -#include "tgsi_build.h" #include "util/u_memory.h" -void -tgsi_full_token_init( - union tgsi_full_token *full_token ) -{ - full_token->Token.Type = TGSI_TOKEN_TYPE_DECLARATION; -} - -void -tgsi_full_token_free( - union tgsi_full_token *full_token ) -{ -} - unsigned tgsi_parse_init( struct tgsi_parse_context *ctx, const struct tgsi_token *tokens ) { - ctx->FullVersion.Version = *(struct tgsi_version *) &tokens[0]; - if( ctx->FullVersion.Version.MajorVersion > 1 ) { - return TGSI_PARSE_ERROR; - } - - ctx->FullHeader.Header = *(struct tgsi_header *) &tokens[1]; + ctx->FullHeader.Header = *(struct tgsi_header *) &tokens[0]; if( ctx->FullHeader.Header.HeaderSize >= 2 ) { - ctx->FullHeader.Processor = *(struct tgsi_processor *) &tokens[2]; + ctx->FullHeader.Processor = *(struct tgsi_processor *) &tokens[1]; } else { - ctx->FullHeader.Processor = tgsi_default_processor(); + return TGSI_PARSE_ERROR; } ctx->Tokens = tokens; - ctx->Position = 1 + ctx->FullHeader.Header.HeaderSize; - - tgsi_full_token_init( &ctx->FullToken ); + ctx->Position = ctx->FullHeader.Header.HeaderSize; return TGSI_PARSE_OK; } @@ -74,7 +53,6 @@ void tgsi_parse_free( struct tgsi_parse_context *ctx ) { - tgsi_full_token_free( &ctx->FullToken ); } boolean @@ -82,7 +60,7 @@ tgsi_parse_end_of_tokens( struct tgsi_parse_context *ctx ) { return ctx->Position >= - 1 + ctx->FullHeader.Header.HeaderSize + ctx->FullHeader.Header.BodySize; + ctx->FullHeader.Header.HeaderSize + ctx->FullHeader.Header.BodySize; } @@ -119,9 +97,6 @@ tgsi_parse_token( struct tgsi_token token; unsigned i; - tgsi_full_token_free( &ctx->FullToken ); - tgsi_full_token_init( &ctx->FullToken ); - next_token( ctx, &token ); switch( token.Type ) { @@ -129,10 +104,10 @@ tgsi_parse_token( { struct tgsi_full_declaration *decl = &ctx->FullToken.FullDeclaration; - *decl = tgsi_default_full_declaration(); + memset(decl, 0, sizeof *decl); copy_token(&decl->Declaration, &token); - next_token( ctx, &decl->DeclarationRange ); + next_token( ctx, &decl->Range ); if( decl->Declaration.Semantic ) { next_token( ctx, &decl->Semantic ); @@ -145,9 +120,8 @@ tgsi_parse_token( { struct tgsi_full_immediate *imm = &ctx->FullToken.FullImmediate; - *imm = tgsi_default_full_immediate(); + memset(imm, 0, sizeof *imm); copy_token(&imm->Immediate, &token); - assert( !imm->Immediate.Extended ); switch (imm->Immediate.DataType) { case TGSI_IMM_FLOAT32: @@ -169,147 +143,76 @@ tgsi_parse_token( case TGSI_TOKEN_TYPE_INSTRUCTION: { struct tgsi_full_instruction *inst = &ctx->FullToken.FullInstruction; - unsigned extended; - *inst = tgsi_default_full_instruction(); + memset(inst, 0, sizeof *inst); copy_token(&inst->Instruction, &token); - extended = inst->Instruction.Extended; - - while( extended ) { - struct tgsi_src_register_ext token; - next_token( ctx, &token ); - - switch( token.Type ) { - case TGSI_INSTRUCTION_EXT_TYPE_NV: - copy_token(&inst->InstructionExtNv, &token); - break; - - case TGSI_INSTRUCTION_EXT_TYPE_LABEL: - copy_token(&inst->InstructionExtLabel, &token); - break; - - case TGSI_INSTRUCTION_EXT_TYPE_TEXTURE: - copy_token(&inst->InstructionExtTexture, &token); - break; + if (inst->Instruction.Predicate) { + next_token(ctx, &inst->Predicate); + } - default: - assert( 0 ); - } + if (inst->Instruction.Label) { + next_token( ctx, &inst->Label); + } - extended = token.Extended; + if (inst->Instruction.Texture) { + next_token( ctx, &inst->Texture); } assert( inst->Instruction.NumDstRegs <= TGSI_FULL_MAX_DST_REGISTERS ); for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) { - unsigned extended; - next_token( ctx, &inst->FullDstRegisters[i].DstRegister ); + next_token( ctx, &inst->Dst[i].Register ); /* * No support for indirect or multi-dimensional addressing. */ - assert( !inst->FullDstRegisters[i].DstRegister.Dimension ); - - extended = inst->FullDstRegisters[i].DstRegister.Extended; - - while( extended ) { - struct tgsi_src_register_ext token; - - next_token( ctx, &token ); - - switch( token.Type ) { - case TGSI_DST_REGISTER_EXT_TYPE_CONDCODE: - copy_token(&inst->FullDstRegisters[i].DstRegisterExtConcode, - &token); - break; - - case TGSI_DST_REGISTER_EXT_TYPE_MODULATE: - copy_token(&inst->FullDstRegisters[i].DstRegisterExtModulate, - &token); - break; - - default: - assert( 0 ); - } - - extended = token.Extended; - } + assert( !inst->Dst[i].Register.Dimension ); - if( inst->FullDstRegisters[i].DstRegister.Indirect ) { - next_token( ctx, &inst->FullDstRegisters[i].DstRegisterInd ); + if( inst->Dst[i].Register.Indirect ) { + next_token( ctx, &inst->Dst[i].Indirect ); /* * No support for indirect or multi-dimensional addressing. */ - assert( !inst->FullDstRegisters[i].DstRegisterInd.Indirect ); - assert( !inst->FullDstRegisters[i].DstRegisterInd.Dimension ); - assert( !inst->FullDstRegisters[i].DstRegisterInd.Extended ); + assert( !inst->Dst[i].Indirect.Dimension ); + assert( !inst->Dst[i].Indirect.Indirect ); } } assert( inst->Instruction.NumSrcRegs <= TGSI_FULL_MAX_SRC_REGISTERS ); for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) { - unsigned extended; - next_token( ctx, &inst->FullSrcRegisters[i].SrcRegister ); - - extended = inst->FullSrcRegisters[i].SrcRegister.Extended; - - while( extended ) { - struct tgsi_src_register_ext token; - - next_token( ctx, &token ); - - switch( token.Type ) { - case TGSI_SRC_REGISTER_EXT_TYPE_SWZ: - copy_token(&inst->FullSrcRegisters[i].SrcRegisterExtSwz, - &token); - break; - - case TGSI_SRC_REGISTER_EXT_TYPE_MOD: - copy_token(&inst->FullSrcRegisters[i].SrcRegisterExtMod, - &token); - break; - - default: - assert( 0 ); - } - - extended = token.Extended; - } + next_token( ctx, &inst->Src[i].Register ); - if( inst->FullSrcRegisters[i].SrcRegister.Indirect ) { - next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterInd ); + if( inst->Src[i].Register.Indirect ) { + next_token( ctx, &inst->Src[i].Indirect ); /* * No support for indirect or multi-dimensional addressing. */ - assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect ); - assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension ); - assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Extended ); + assert( !inst->Src[i].Indirect.Indirect ); + assert( !inst->Src[i].Indirect.Dimension ); } - if( inst->FullSrcRegisters[i].SrcRegister.Dimension ) { - next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDim ); + if( inst->Src[i].Register.Dimension ) { + next_token( ctx, &inst->Src[i].Dimension ); /* * No support for multi-dimensional addressing. */ - assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Dimension ); - assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Extended ); + assert( !inst->Src[i].Dimension.Dimension ); - if( inst->FullSrcRegisters[i].SrcRegisterDim.Indirect ) { - next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDimInd ); + if( inst->Src[i].Dimension.Indirect ) { + next_token( ctx, &inst->Src[i].DimIndirect ); /* * No support for indirect or multi-dimensional addressing. */ - assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect ); - assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension ); - assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Extended ); + assert( !inst->Src[i].Indirect.Indirect ); + assert( !inst->Src[i].Indirect.Dimension ); } } } @@ -329,8 +232,7 @@ tgsi_num_tokens(const struct tgsi_token *tokens) struct tgsi_parse_context ctx; if (tgsi_parse_init(&ctx, tokens) == TGSI_PARSE_OK) { unsigned len = (ctx.FullHeader.Header.HeaderSize + - ctx.FullHeader.Header.BodySize + - 1); + ctx.FullHeader.Header.BodySize); return len; } return 0; diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h index a26ee5ba86..3aa1979a63 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h @@ -34,11 +34,6 @@ extern "C" { #endif -struct tgsi_full_version -{ - struct tgsi_version Version; -}; - struct tgsi_full_header { struct tgsi_header Header; @@ -47,26 +42,22 @@ struct tgsi_full_header struct tgsi_full_dst_register { - struct tgsi_dst_register DstRegister; - struct tgsi_src_register DstRegisterInd; - struct tgsi_dst_register_ext_concode DstRegisterExtConcode; - struct tgsi_dst_register_ext_modulate DstRegisterExtModulate; + struct tgsi_dst_register Register; + struct tgsi_src_register Indirect; }; struct tgsi_full_src_register { - struct tgsi_src_register SrcRegister; - struct tgsi_src_register_ext_swz SrcRegisterExtSwz; - struct tgsi_src_register_ext_mod SrcRegisterExtMod; - struct tgsi_src_register SrcRegisterInd; - struct tgsi_dimension SrcRegisterDim; - struct tgsi_src_register SrcRegisterDimInd; + struct tgsi_src_register Register; + struct tgsi_src_register Indirect; + struct tgsi_dimension Dimension; + struct tgsi_src_register DimIndirect; }; struct tgsi_full_declaration { struct tgsi_declaration Declaration; - struct tgsi_declaration_range DeclarationRange; + struct tgsi_declaration_range Range; struct tgsi_declaration_semantic Semantic; }; @@ -82,12 +73,11 @@ struct tgsi_full_immediate struct tgsi_full_instruction { struct tgsi_instruction Instruction; - struct tgsi_instruction_ext_nv InstructionExtNv; - struct tgsi_instruction_ext_label InstructionExtLabel; - struct tgsi_instruction_ext_texture InstructionExtTexture; - struct tgsi_full_dst_register FullDstRegisters[TGSI_FULL_MAX_DST_REGISTERS]; - struct tgsi_full_src_register FullSrcRegisters[TGSI_FULL_MAX_SRC_REGISTERS]; - uint Flags; /**< user-defined usage */ + struct tgsi_instruction_predicate Predicate; + struct tgsi_instruction_label Label; + struct tgsi_instruction_texture Texture; + struct tgsi_full_dst_register Dst[TGSI_FULL_MAX_DST_REGISTERS]; + struct tgsi_full_src_register Src[TGSI_FULL_MAX_SRC_REGISTERS]; }; union tgsi_full_token @@ -98,19 +88,10 @@ union tgsi_full_token struct tgsi_full_instruction FullInstruction; }; -void -tgsi_full_token_init( - union tgsi_full_token *full_token ); - -void -tgsi_full_token_free( - union tgsi_full_token *full_token ); - struct tgsi_parse_context { const struct tgsi_token *Tokens; unsigned Position; - struct tgsi_full_version FullVersion; struct tgsi_full_header FullHeader; union tgsi_full_token FullToken; }; diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 4b1c7d4e01..da6ad6da04 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -60,7 +60,7 @@ const float ppc_builtin_constants[] ALIGN16_ATTRIB = { for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ - ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + ((INST).Dst[0].Register.WriteMask & (1 << (CHAN))) #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) @@ -156,8 +156,8 @@ init_gen_context(struct gen_context *gen, struct ppc_function *func) static boolean is_ppc_vec_temporary(const struct tgsi_full_src_register *reg) { - return (reg->SrcRegister.File == TGSI_FILE_TEMPORARY && - reg->SrcRegister.Index < MAX_PPC_TEMPS); + return (reg->Register.File == TGSI_FILE_TEMPORARY && + reg->Register.Index < MAX_PPC_TEMPS); } @@ -167,8 +167,8 @@ is_ppc_vec_temporary(const struct tgsi_full_src_register *reg) static boolean is_ppc_vec_temporary_dst(const struct tgsi_full_dst_register *reg) { - return (reg->DstRegister.File == TGSI_FILE_TEMPORARY && - reg->DstRegister.Index < MAX_PPC_TEMPS); + return (reg->Register.File == TGSI_FILE_TEMPORARY && + reg->Register.Index < MAX_PPC_TEMPS); } @@ -283,18 +283,18 @@ emit_fetch(struct gen_context *gen, const struct tgsi_full_src_register *reg, const unsigned chan_index) { - uint swizzle = tgsi_util_get_full_src_register_extswizzle(reg, chan_index); + uint swizzle = tgsi_util_get_full_src_register_swizzle(reg, chan_index); int dst_vec = -1; switch (swizzle) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: - switch (reg->SrcRegister.File) { + case TGSI_SWIZZLE_X: + case TGSI_SWIZZLE_Y: + case TGSI_SWIZZLE_Z: + case TGSI_SWIZZLE_W: + switch (reg->Register.File) { case TGSI_FILE_INPUT: { - int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; + int offset = (reg->Register.Index * 4 + swizzle) * 16; int offset_reg = emit_li_offset(gen, offset); dst_vec = ppc_allocate_vec_register(gen->f); ppc_lvx(gen->f, dst_vec, gen->inputs_reg, offset_reg); @@ -303,11 +303,11 @@ emit_fetch(struct gen_context *gen, case TGSI_FILE_TEMPORARY: if (is_ppc_vec_temporary(reg)) { /* use PPC vec register */ - dst_vec = gen->temps_map[reg->SrcRegister.Index][swizzle]; + dst_vec = gen->temps_map[reg->Register.Index][swizzle]; } else { /* use memory-based temp register "file" */ - int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; + int offset = (reg->Register.Index * 4 + swizzle) * 16; int offset_reg = emit_li_offset(gen, offset); dst_vec = ppc_allocate_vec_register(gen->f); ppc_lvx(gen->f, dst_vec, gen->temps_reg, offset_reg); @@ -315,7 +315,7 @@ emit_fetch(struct gen_context *gen, break; case TGSI_FILE_IMMEDIATE: { - int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4; + int offset = (reg->Register.Index * 4 + swizzle) * 4; int offset_reg = emit_li_offset(gen, offset); dst_vec = ppc_allocate_vec_register(gen->f); /* Load 4-byte word into vector register. @@ -331,7 +331,7 @@ emit_fetch(struct gen_context *gen, break; case TGSI_FILE_CONSTANT: { - int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4; + int offset = (reg->Register.Index * 4 + swizzle) * 4; int offset_reg = emit_li_offset(gen, offset); dst_vec = ppc_allocate_vec_register(gen->f); /* Load 4-byte word into vector register. @@ -349,16 +349,6 @@ emit_fetch(struct gen_context *gen, assert( 0 ); } break; - case TGSI_EXTSWIZZLE_ZERO: - ppc_vzero(gen->f, dst_vec); - break; - case TGSI_EXTSWIZZLE_ONE: - { - int one_vec = gen_one_vec(gen); - dst_vec = ppc_allocate_vec_register(gen->f); - ppc_vmove(gen->f, dst_vec, one_vec); - } - break; default: assert( 0 ); } @@ -414,12 +404,12 @@ equal_src_locs(const struct tgsi_full_src_register *a, uint chan_a, { int swz_a, swz_b; int sign_a, sign_b; - if (a->SrcRegister.File != b->SrcRegister.File) + if (a->Register.File != b->Register.File) return FALSE; - if (a->SrcRegister.Index != b->SrcRegister.Index) + if (a->Register.Index != b->Register.Index) return FALSE; - swz_a = tgsi_util_get_full_src_register_extswizzle(a, chan_a); - swz_b = tgsi_util_get_full_src_register_extswizzle(b, chan_b); + swz_a = tgsi_util_get_full_src_register_swizzle(a, chan_a); + swz_b = tgsi_util_get_full_src_register_swizzle(b, chan_b); if (swz_a != swz_b) return FALSE; sign_a = tgsi_util_get_full_src_register_sign_mode(a, chan_a); @@ -441,7 +431,7 @@ get_src_vec(struct gen_context *gen, struct tgsi_full_instruction *inst, int src_reg, uint chan) { const const struct tgsi_full_src_register *src = - &inst->FullSrcRegisters[src_reg]; + &inst->Src[src_reg]; int vec; uint i; @@ -492,10 +482,10 @@ get_dst_vec(struct gen_context *gen, const struct tgsi_full_instruction *inst, unsigned chan_index) { - const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[0]; + const struct tgsi_full_dst_register *reg = &inst->Dst[0]; if (is_ppc_vec_temporary_dst(reg)) { - int vec = gen->temps_map[reg->DstRegister.Index][chan_index]; + int vec = gen->temps_map[reg->Register.Index][chan_index]; return vec; } else { @@ -515,12 +505,12 @@ emit_store(struct gen_context *gen, unsigned chan_index, boolean free_vec) { - const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[0]; + const struct tgsi_full_dst_register *reg = &inst->Dst[0]; - switch (reg->DstRegister.File) { + switch (reg->Register.File) { case TGSI_FILE_OUTPUT: { - int offset = (reg->DstRegister.Index * 4 + chan_index) * 16; + int offset = (reg->Register.Index * 4 + chan_index) * 16; int offset_reg = emit_li_offset(gen, offset); ppc_stvx(gen->f, src_vec, gen->outputs_reg, offset_reg); } @@ -528,14 +518,14 @@ emit_store(struct gen_context *gen, case TGSI_FILE_TEMPORARY: if (is_ppc_vec_temporary_dst(reg)) { if (!free_vec) { - int dst_vec = gen->temps_map[reg->DstRegister.Index][chan_index]; + int dst_vec = gen->temps_map[reg->Register.Index][chan_index]; if (dst_vec != src_vec) ppc_vmove(gen->f, dst_vec, src_vec); } free_vec = FALSE; } else { - int offset = (reg->DstRegister.Index * 4 + chan_index) * 16; + int offset = (reg->Register.Index * 4 + chan_index) * 16; int offset_reg = emit_li_offset(gen, offset); ppc_stvx(gen->f, src_vec, gen->temps_reg, offset_reg); } @@ -545,7 +535,7 @@ emit_store(struct gen_context *gen, emit_addrs( func, xmm, - reg->DstRegister.Index, + reg->Register.Index, chan_index ); break; #endif @@ -635,7 +625,6 @@ emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) ppc_vlogefp(gen->f, v1, v0); /* v1 = log2(v0) */ break; case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: if (v0 != v1) ppc_vmove(gen->f, v1, v0); break; @@ -1119,7 +1108,6 @@ emit_instruction(struct gen_context *gen, switch (inst->Instruction.Opcode) { case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: case TGSI_OPCODE_ABS: case TGSI_OPCODE_FLR: case TGSI_OPCODE_FRC: @@ -1190,8 +1178,8 @@ emit_declaration( unsigned first, last, mask; unsigned i, j; - first = decl->DeclarationRange.First; - last = decl->DeclarationRange.Last; + first = decl->Range.First; + last = decl->Range.Last; mask = decl->Declaration.UsageMask; for( i = first; i <= last; i++ ) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 53e13b30e6..b5d1faa897 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -34,7 +34,7 @@ typedef uint reg_flag; #define BITS_IN_REG_FLAG (sizeof( reg_flag ) * 8) -#define MAX_REGISTERS 256 +#define MAX_REGISTERS 1024 #define MAX_REG_FLAGS ((MAX_REGISTERS + BITS_IN_REG_FLAG - 1) / BITS_IN_REG_FLAG) struct sanity_check_ctx @@ -141,7 +141,8 @@ static const char *file_names[TGSI_FILE_COUNT] = "SAMP", "ADDR", "IMM", - "LOOP" + "LOOP", + "PRED" }; static boolean @@ -211,24 +212,24 @@ iter_instruction( for (i = 0; i < inst->Instruction.NumDstRegs; i++) { check_register_usage( ctx, - inst->FullDstRegisters[i].DstRegister.File, - inst->FullDstRegisters[i].DstRegister.Index, + inst->Dst[i].Register.File, + inst->Dst[i].Register.Index, "destination", FALSE ); } for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { check_register_usage( ctx, - inst->FullSrcRegisters[i].SrcRegister.File, - inst->FullSrcRegisters[i].SrcRegister.Index, + inst->Src[i].Register.File, + inst->Src[i].Register.Index, "source", - (boolean)inst->FullSrcRegisters[i].SrcRegister.Indirect ); - if (inst->FullSrcRegisters[i].SrcRegister.Indirect) { + (boolean)inst->Src[i].Register.Indirect ); + if (inst->Src[i].Register.Indirect) { uint file; int index; - file = inst->FullSrcRegisters[i].SrcRegisterInd.File; - index = inst->FullSrcRegisters[i].SrcRegisterInd.Index; + file = inst->Src[i].Indirect.File; + index = inst->Src[i].Indirect.Index; check_register_usage( ctx, file, @@ -244,8 +245,8 @@ iter_instruction( switch (inst->Instruction.Opcode) { case TGSI_OPCODE_BGNFOR: case TGSI_OPCODE_ENDFOR: - if (inst->FullDstRegisters[0].DstRegister.File != TGSI_FILE_LOOP || - inst->FullDstRegisters[0].DstRegister.Index != 0) { + if (inst->Dst[0].Register.File != TGSI_FILE_LOOP || + inst->Dst[0].Register.Index != 0) { report_error(ctx, "Destination register must be LOOP[0]"); } break; @@ -253,8 +254,8 @@ iter_instruction( switch (inst->Instruction.Opcode) { case TGSI_OPCODE_BGNFOR: - if (inst->FullSrcRegisters[0].SrcRegister.File != TGSI_FILE_CONSTANT && - inst->FullSrcRegisters[0].SrcRegister.File != TGSI_FILE_IMMEDIATE) { + if (inst->Src[0].Register.File != TGSI_FILE_CONSTANT && + inst->Src[0].Register.File != TGSI_FILE_IMMEDIATE) { report_error(ctx, "Source register file must be either CONST or IMM"); } break; @@ -285,7 +286,7 @@ iter_declaration( file = decl->Declaration.File; if (!check_file_name( ctx, file )) return TRUE; - for (i = decl->DeclarationRange.First; i <= decl->DeclarationRange.Last; i++) { + for (i = decl->Range.First; i <= decl->Range.Last; i++) { if (is_register_declared( ctx, file, i )) report_error( ctx, "%s[%u]: The same register declared more than once", file_names[file], i ); ctx->regs_decl[file][i / BITS_IN_REG_FLAG] |= (1 << (i % BITS_IN_REG_FLAG)); diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index c535788819..a5d2db04ec 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -35,7 +35,6 @@ #include "util/u_math.h" -#include "tgsi/tgsi_build.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_scan.h" @@ -97,14 +96,14 @@ tgsi_scan_shader(const struct tgsi_token *tokens, uint i; for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *src = - &fullinst->FullSrcRegisters[i]; - if (src->SrcRegister.File == TGSI_FILE_INPUT) { - const int ind = src->SrcRegister.Index; + &fullinst->Src[i]; + if (src->Register.File == TGSI_FILE_INPUT) { + const int ind = src->Register.Index; if (info->input_semantic_name[ind] == TGSI_SEMANTIC_FOG) { - if (src->SrcRegister.SwizzleX == TGSI_SWIZZLE_X) { + if (src->Register.SwizzleX == TGSI_SWIZZLE_X) { info->uses_fogcoord = TRUE; } - else if (src->SrcRegister.SwizzleX == TGSI_SWIZZLE_Y) { + else if (src->Register.SwizzleX == TGSI_SWIZZLE_Y) { info->uses_frontfacing = TRUE; } } @@ -120,8 +119,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens, = &parse.FullToken.FullDeclaration; const uint file = fulldecl->Declaration.File; uint reg; - for (reg = fulldecl->DeclarationRange.First; - reg <= fulldecl->DeclarationRange.Last; + for (reg = fulldecl->Range.First; + reg <= fulldecl->Range.Last; reg++) { /* only first 32 regs will appear in this bitfield */ @@ -130,20 +129,21 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->file_max[file] = MAX2(info->file_max[file], (int)reg); if (file == TGSI_FILE_INPUT) { - info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.SemanticName; - info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.SemanticIndex; + info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.Name; + info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.Index; + info->input_interpolate[reg] = (ubyte)fulldecl->Declaration.Interpolate; info->num_inputs++; } else if (file == TGSI_FILE_OUTPUT) { - info->output_semantic_name[reg] = (ubyte)fulldecl->Semantic.SemanticName; - info->output_semantic_index[reg] = (ubyte)fulldecl->Semantic.SemanticIndex; + info->output_semantic_name[reg] = (ubyte)fulldecl->Semantic.Name; + info->output_semantic_index[reg] = (ubyte)fulldecl->Semantic.Index; info->num_outputs++; } /* special case */ if (procType == TGSI_PROCESSOR_FRAGMENT && file == TGSI_FILE_OUTPUT && - fulldecl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) { + fulldecl->Semantic.Name == TGSI_SEMANTIC_POSITION) { info->writes_z = TRUE; } } @@ -205,34 +205,25 @@ tgsi_is_passthrough_shader(const struct tgsi_token *tokens) struct tgsi_full_instruction *fullinst = &parse.FullToken.FullInstruction; const struct tgsi_full_src_register *src = - &fullinst->FullSrcRegisters[0]; + &fullinst->Src[0]; const struct tgsi_full_dst_register *dst = - &fullinst->FullDstRegisters[0]; + &fullinst->Dst[0]; /* Do a whole bunch of checks for a simple move */ if (fullinst->Instruction.Opcode != TGSI_OPCODE_MOV || - src->SrcRegister.File != TGSI_FILE_INPUT || - dst->DstRegister.File != TGSI_FILE_OUTPUT || - src->SrcRegister.Index != dst->DstRegister.Index || - - src->SrcRegister.Negate || - src->SrcRegisterExtMod.Negate || - src->SrcRegisterExtMod.Absolute || - src->SrcRegisterExtMod.Scale2X || - src->SrcRegisterExtMod.Bias || - src->SrcRegisterExtMod.Complement || - - src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X || - src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y || - src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z || - src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W || - - src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X || - src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y || - src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z || - src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W || - - dst->DstRegister.WriteMask != TGSI_WRITEMASK_XYZW) + src->Register.File != TGSI_FILE_INPUT || + dst->Register.File != TGSI_FILE_OUTPUT || + src->Register.Index != dst->Register.Index || + + src->Register.Negate || + src->Register.Absolute || + + src->Register.SwizzleX != TGSI_SWIZZLE_X || + src->Register.SwizzleY != TGSI_SWIZZLE_Y || + src->Register.SwizzleZ != TGSI_SWIZZLE_Z || + src->Register.SwizzleW != TGSI_SWIZZLE_W || + + dst->Register.WriteMask != TGSI_WRITEMASK_XYZW) { tgsi_parse_free(&parse); return FALSE; diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h index 2c1a75bc81..8a7ee0c7e4 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -45,6 +45,7 @@ struct tgsi_shader_info ubyte num_outputs; ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */ ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; + ubyte input_interpolate[PIPE_MAX_SHADER_INPUTS]; ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */ ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 501fc05e72..76051ea0d8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -58,7 +58,7 @@ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ - ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + ((INST).Dst[0].Register.WriteMask & (1 << (CHAN))) #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) @@ -1260,30 +1260,30 @@ emit_fetch( const struct tgsi_full_src_register *reg, const unsigned chan_index ) { - unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); + unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); switch (swizzle) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: - switch (reg->SrcRegister.File) { + case TGSI_SWIZZLE_X: + case TGSI_SWIZZLE_Y: + case TGSI_SWIZZLE_Z: + case TGSI_SWIZZLE_W: + switch (reg->Register.File) { case TGSI_FILE_CONSTANT: emit_const( func, xmm, - reg->SrcRegister.Index, + reg->Register.Index, swizzle, - reg->SrcRegister.Indirect, - reg->SrcRegisterInd.File, - reg->SrcRegisterInd.Index ); + reg->Register.Indirect, + reg->Indirect.File, + reg->Indirect.Index ); break; case TGSI_FILE_IMMEDIATE: emit_immediate( func, xmm, - reg->SrcRegister.Index, + reg->Register.Index, swizzle ); break; @@ -1291,7 +1291,7 @@ emit_fetch( emit_inputf( func, xmm, - reg->SrcRegister.Index, + reg->Register.Index, swizzle ); break; @@ -1299,7 +1299,7 @@ emit_fetch( emit_tempf( func, xmm, - reg->SrcRegister.Index, + reg->Register.Index, swizzle ); break; @@ -1308,22 +1308,6 @@ emit_fetch( } break; - case TGSI_EXTSWIZZLE_ZERO: - emit_tempf( - func, - xmm, - TGSI_EXEC_TEMP_00000000_I, - TGSI_EXEC_TEMP_00000000_C ); - break; - - case TGSI_EXTSWIZZLE_ONE: - emit_tempf( - func, - xmm, - TEMP_ONE_I, - TEMP_ONE_C ); - break; - default: assert( 0 ); } @@ -1347,7 +1331,7 @@ emit_fetch( } #define FETCH( FUNC, INST, XMM, INDEX, CHAN )\ - emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN ) + emit_fetch( FUNC, XMM, &(INST).Src[INDEX], CHAN ) /** * Register store. @@ -1387,12 +1371,12 @@ emit_store( } - switch( reg->DstRegister.File ) { + switch( reg->Register.File ) { case TGSI_FILE_OUTPUT: emit_output( func, xmm, - reg->DstRegister.Index, + reg->Register.Index, chan_index ); break; @@ -1400,7 +1384,7 @@ emit_store( emit_temps( func, xmm, - reg->DstRegister.Index, + reg->Register.Index, chan_index ); break; @@ -1408,7 +1392,7 @@ emit_store( emit_addrs( func, xmm, - reg->DstRegister.Index, + reg->Register.Index, chan_index ); break; @@ -1418,7 +1402,7 @@ emit_store( } #define STORE( FUNC, INST, XMM, INDEX, CHAN )\ - emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN ) + emit_store( FUNC, XMM, &(INST).Dst[INDEX], &(INST), CHAN ) static void PIPE_CDECL @@ -1445,11 +1429,11 @@ fetch_texel( struct tgsi_sampler **sampler, { float rgba[NUM_CHANNELS][QUAD_SIZE]; (*sampler)->get_samples(*sampler, - &store[0], - &store[4], - &store[8], - 0.0f, /*store[12], lodbias */ - rgba); + &store[0], /* s */ + &store[4], /* t */ + &store[8], /* r */ + store[12], /* lodbias */ + rgba); /* results */ memcpy( store, rgba, 16 * sizeof(float)); } @@ -1475,12 +1459,13 @@ emit_tex( struct x86_function *func, boolean lodbias, boolean projected) { - const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + const uint unit = inst->Src[1].Register.Index; struct x86_reg args[2]; unsigned count; unsigned i; - switch (inst->InstructionExtTexture.Texture) { + assert(inst->Instruction.Texture); + switch (inst->Texture.Texture) { case TGSI_TEXTURE_1D: count = 1; break; @@ -1582,13 +1567,13 @@ emit_kil( /* This mask stores component bits that were already tested. Note that * we test if the value is less than zero, so 1.0 and 0.0 need not to be * tested. */ - uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); + uniquemask = 0; FOR_EACH_CHANNEL( chan_index ) { unsigned swizzle; /* unswizzle channel */ - swizzle = tgsi_util_get_full_src_register_extswizzle( + swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); @@ -1735,15 +1720,15 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst) { uint i; for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i]; - if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY && - reg->SrcRegister.Indirect) + const struct tgsi_full_src_register *reg = &inst->Src[i]; + if (reg->Register.File == TGSI_FILE_TEMPORARY && + reg->Register.Indirect) return TRUE; } for (i = 0; i < inst->Instruction.NumDstRegs; i++) { - const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i]; - if (reg->DstRegister.File == TGSI_FILE_TEMPORARY && - reg->DstRegister.Indirect) + const struct tgsi_full_dst_register *reg = &inst->Dst[i]; + if (reg->Register.File == TGSI_FILE_TEMPORARY && + reg->Register.Indirect) return TRUE; } return FALSE; @@ -1772,7 +1757,6 @@ emit_instruction( break; case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 4 + chan_index, 0, chan_index ); } @@ -1855,7 +1839,6 @@ emit_instruction( break; case TGSI_OPCODE_RCP: - /* TGSI_OPCODE_RECIP */ FETCH( func, *inst, 0, 0, CHAN_X ); emit_rcp( func, 0, 0 ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { @@ -1864,7 +1847,6 @@ emit_instruction( break; case TGSI_OPCODE_RSQ: - /* TGSI_OPCODE_RECIPSQRT */ FETCH( func, *inst, 0, 0, CHAN_X ); emit_abs( func, 0 ); emit_rsqrt( func, 1, 0 ); @@ -1962,7 +1944,6 @@ emit_instruction( break; case TGSI_OPCODE_DP3: - /* TGSI_OPCODE_DOT3 */ FETCH( func, *inst, 0, 0, CHAN_X ); FETCH( func, *inst, 1, 1, CHAN_X ); emit_mul( func, 0, 1 ); @@ -1980,7 +1961,6 @@ emit_instruction( break; case TGSI_OPCODE_DP4: - /* TGSI_OPCODE_DOT4 */ FETCH( func, *inst, 0, 0, CHAN_X ); FETCH( func, *inst, 1, 1, CHAN_X ); emit_mul( func, 0, 1 ); @@ -2051,17 +2031,14 @@ emit_instruction( break; case TGSI_OPCODE_SLT: - /* TGSI_OPCODE_SETLT */ emit_setcc( func, inst, cc_LessThan ); break; case TGSI_OPCODE_SGE: - /* TGSI_OPCODE_SETGE */ emit_setcc( func, inst, cc_NotLessThan ); break; case TGSI_OPCODE_MAD: - /* TGSI_OPCODE_MADD */ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); FETCH( func, *inst, 1, 1, chan_index ); @@ -2267,7 +2244,7 @@ emit_instruction( case TGSI_OPCODE_KIL: /* conditional kill */ - emit_kil( func, &inst->FullSrcRegisters[0] ); + emit_kil( func, &inst->Src[0] ); break; case TGSI_OPCODE_PK2H: @@ -2291,7 +2268,7 @@ emit_instruction( break; case TGSI_OPCODE_SEQ: - return 0; + emit_setcc( func, inst, cc_Equal ); break; case TGSI_OPCODE_SFL: @@ -2299,7 +2276,7 @@ emit_instruction( break; case TGSI_OPCODE_SGT: - return 0; + emit_setcc( func, inst, cc_NotLessThanEqual ); break; case TGSI_OPCODE_SIN: @@ -2311,11 +2288,11 @@ emit_instruction( break; case TGSI_OPCODE_SLE: - return 0; + emit_setcc( func, inst, cc_LessThanEqual ); break; case TGSI_OPCODE_SNE: - return 0; + emit_setcc( func, inst, cc_NotEqual ); break; case TGSI_OPCODE_STR: @@ -2379,7 +2356,6 @@ emit_instruction( break; case TGSI_OPCODE_SSG: - /* TGSI_OPCODE_SGN */ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); emit_sgn( func, 0, 0 ); @@ -2661,8 +2637,8 @@ emit_declaration( unsigned first, last, mask; unsigned i, j; - first = decl->DeclarationRange.First; - last = decl->DeclarationRange.Last; + first = decl->Range.First; + last = decl->Range.Last; mask = decl->Declaration.UsageMask; for( i = first; i <= last; i++ ) { @@ -2946,8 +2922,7 @@ tgsi_emit_sse2( * the result in the cases where the code is too opaque to * fix. */ - if (opcode != TGSI_OPCODE_MOV && - opcode != TGSI_OPCODE_SWZ) { + if (opcode != TGSI_OPCODE_MOV) { debug_printf("Warning: src/dst aliasing in instruction" " is not handled:\n"); tgsi_dump_instruction(&parse.FullToken.FullInstruction, 1); diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index d438450b1e..eb376fa957 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -172,19 +172,19 @@ static void report_error( struct translate_ctx *ctx, const char *msg ) /* Parse shader header. * Return TRUE for one of the following headers. - * FRAG1.1 - * GEOM1.1 - * VERT1.1 + * FRAG + * GEOM + * VERT */ static boolean parse_header( struct translate_ctx *ctx ) { uint processor; - if (str_match_no_case( &ctx->cur, "FRAG1.1" )) + if (str_match_no_case( &ctx->cur, "FRAG" )) processor = TGSI_PROCESSOR_FRAGMENT; - else if (str_match_no_case( &ctx->cur, "VERT1.1" )) + else if (str_match_no_case( &ctx->cur, "VERT" )) processor = TGSI_PROCESSOR_VERTEX; - else if (str_match_no_case( &ctx->cur, "GEOM1.1" )) + else if (str_match_no_case( &ctx->cur, "GEOM" )) processor = TGSI_PROCESSOR_GEOMETRY; else { report_error( ctx, "Unknown header" ); @@ -193,10 +193,6 @@ static boolean parse_header( struct translate_ctx *ctx ) if (ctx->tokens_cur >= ctx->tokens_end) return FALSE; - *(struct tgsi_version *) ctx->tokens_cur++ = tgsi_build_version(); - - if (ctx->tokens_cur >= ctx->tokens_end) - return FALSE; ctx->header = (struct tgsi_header *) ctx->tokens_cur++; *ctx->header = tgsi_build_header(); @@ -232,7 +228,8 @@ static const char *file_names[TGSI_FILE_COUNT] = "SAMP", "ADDR", "IMM", - "LOOP" + "LOOP", + "PRED" }; static boolean @@ -485,16 +482,6 @@ parse_register_dcl( return TRUE; } -static const char *modulate_names[TGSI_MODULATE_COUNT] = -{ - "_1X", - "_2X", - "_4X", - "_8X", - "_D2", - "_D4", - "_D8" -}; static boolean parse_dst_operand( @@ -511,26 +498,13 @@ parse_dst_operand( cur = ctx->cur; eat_opt_white( &cur ); - if (*cur == '_') { - uint i; - - for (i = 0; i < TGSI_MODULATE_COUNT; i++) { - if (str_match_no_case( &cur, modulate_names[i] )) { - if (!is_digit_alpha_underscore( cur )) { - dst->DstRegisterExtModulate.Modulate = i; - ctx->cur = cur; - break; - } - } - } - } if (!parse_opt_writemask( ctx, &writemask )) return FALSE; - dst->DstRegister.File = file; - dst->DstRegister.Index = index; - dst->DstRegister.WriteMask = writemask; + dst->Register.File = file; + dst->Register.Index = index; + dst->Register.WriteMask = writemask; return TRUE; } @@ -538,13 +512,11 @@ static boolean parse_optional_swizzle( struct translate_ctx *ctx, uint swizzle[4], - boolean *parsed_swizzle, - boolean *parsed_extswizzle ) + boolean *parsed_swizzle ) { const char *cur = ctx->cur; *parsed_swizzle = FALSE; - *parsed_extswizzle = FALSE; eat_opt_white( &cur ); if (*cur == '.') { @@ -562,15 +534,8 @@ parse_optional_swizzle( else if (uprcase( *cur ) == 'W') swizzle[i] = TGSI_SWIZZLE_W; else { - if (*cur == '0') - swizzle[i] = TGSI_EXTSWIZZLE_ZERO; - else if (*cur == '1') - swizzle[i] = TGSI_EXTSWIZZLE_ONE; - else { - report_error( ctx, "Expected register swizzle component `x', `y', `z', `w', `0' or `1'" ); - return FALSE; - } - *parsed_extswizzle = TRUE; + report_error( ctx, "Expected register swizzle component `x', `y', `z', `w', `0' or `1'" ); + return FALSE; } cur++; } @@ -585,171 +550,52 @@ parse_src_operand( struct translate_ctx *ctx, struct tgsi_full_src_register *src ) { - const char *cur; - float value; uint file; int index; uint ind_file; int ind_index; uint ind_comp; uint swizzle[4]; - boolean parsed_ext_negate_paren = FALSE; boolean parsed_swizzle; - boolean parsed_extswizzle; - - if (*ctx->cur == '-') { - cur = ctx->cur; - cur++; - eat_opt_white( &cur ); - if (*cur == '(') { - cur++; - src->SrcRegisterExtMod.Negate = 1; - eat_opt_white( &cur ); - ctx->cur = cur; - parsed_ext_negate_paren = TRUE; - } - else if (*cur == '|') { - cur++; - src->SrcRegisterExtMod.Negate = 1; - src->SrcRegisterExtMod.Absolute = 1; - eat_opt_white(&cur); - ctx->cur = cur; - } - } - else if (*ctx->cur == '|') { - ctx->cur++; - eat_opt_white( &ctx->cur ); - src->SrcRegisterExtMod.Absolute = 1; - } if (*ctx->cur == '-') { ctx->cur++; eat_opt_white( &ctx->cur ); - src->SrcRegister.Negate = 1; - } - - cur = ctx->cur; - if (parse_float( &cur, &value )) { - if (value == 2.0f) { - eat_opt_white( &cur ); - if (*cur != '*') { - report_error( ctx, "Expected `*'" ); - return FALSE; - } - cur++; - if (*cur != '(') { - report_error( ctx, "Expected `('" ); - return FALSE; - } - cur++; - src->SrcRegisterExtMod.Scale2X = 1; - eat_opt_white( &cur ); - ctx->cur = cur; - } + src->Register.Negate = 1; } - - if (*ctx->cur == '(') { + + if (*ctx->cur == '|') { ctx->cur++; eat_opt_white( &ctx->cur ); - src->SrcRegisterExtMod.Bias = 1; - } - - cur = ctx->cur; - if (parse_float( &cur, &value )) { - if (value == 1.0f) { - eat_opt_white( &cur ); - if (*cur != '-') { - report_error( ctx, "Expected `-'" ); - return FALSE; - } - cur++; - if (*cur != '(') { - report_error( ctx, "Expected `('" ); - return FALSE; - } - cur++; - src->SrcRegisterExtMod.Complement = 1; - eat_opt_white( &cur ); - ctx->cur = cur; - } + src->Register.Absolute = 1; } if (!parse_register_src(ctx, &file, &index, &ind_file, &ind_index, &ind_comp)) return FALSE; - src->SrcRegister.File = file; - src->SrcRegister.Index = index; + src->Register.File = file; + src->Register.Index = index; if (ind_file != TGSI_FILE_NULL) { - src->SrcRegister.Indirect = 1; - src->SrcRegisterInd.File = ind_file; - src->SrcRegisterInd.Index = ind_index; - src->SrcRegisterInd.SwizzleX = ind_comp; - src->SrcRegisterInd.SwizzleY = ind_comp; - src->SrcRegisterInd.SwizzleZ = ind_comp; - src->SrcRegisterInd.SwizzleW = ind_comp; + src->Register.Indirect = 1; + src->Indirect.File = ind_file; + src->Indirect.Index = ind_index; + src->Indirect.SwizzleX = ind_comp; + src->Indirect.SwizzleY = ind_comp; + src->Indirect.SwizzleZ = ind_comp; + src->Indirect.SwizzleW = ind_comp; } /* Parse optional swizzle. */ - if (parse_optional_swizzle( ctx, swizzle, &parsed_swizzle, &parsed_extswizzle )) { - if (parsed_extswizzle) { - assert( parsed_swizzle ); - - src->SrcRegisterExtSwz.ExtSwizzleX = swizzle[0]; - src->SrcRegisterExtSwz.ExtSwizzleY = swizzle[1]; - src->SrcRegisterExtSwz.ExtSwizzleZ = swizzle[2]; - src->SrcRegisterExtSwz.ExtSwizzleW = swizzle[3]; - } - else if (parsed_swizzle) { - src->SrcRegister.SwizzleX = swizzle[0]; - src->SrcRegister.SwizzleY = swizzle[1]; - src->SrcRegister.SwizzleZ = swizzle[2]; - src->SrcRegister.SwizzleW = swizzle[3]; - } - } - - if (src->SrcRegisterExtMod.Complement) { - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ')') { - report_error( ctx, "Expected `)'" ); - return FALSE; + if (parse_optional_swizzle( ctx, swizzle, &parsed_swizzle )) { + if (parsed_swizzle) { + src->Register.SwizzleX = swizzle[0]; + src->Register.SwizzleY = swizzle[1]; + src->Register.SwizzleZ = swizzle[2]; + src->Register.SwizzleW = swizzle[3]; } - ctx->cur++; } - if (src->SrcRegisterExtMod.Bias) { - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ')') { - report_error( ctx, "Expected `)'" ); - return FALSE; - } - ctx->cur++; - eat_opt_white( &ctx->cur ); - if (*ctx->cur != '-') { - report_error( ctx, "Expected `-'" ); - return FALSE; - } - ctx->cur++; - eat_opt_white( &ctx->cur ); - if (!parse_float( &ctx->cur, &value )) { - report_error( ctx, "Expected literal floating point" ); - return FALSE; - } - if (value != 0.5f) { - report_error( ctx, "Expected 0.5" ); - return FALSE; - } - } - - if (src->SrcRegisterExtMod.Scale2X) { - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ')') { - report_error( ctx, "Expected `)'" ); - return FALSE; - } - ctx->cur++; - } - - if (src->SrcRegisterExtMod.Absolute) { + if (src->Register.Absolute) { eat_opt_white( &ctx->cur ); if (*ctx->cur != '|') { report_error( ctx, "Expected `|'" ); @@ -758,14 +604,6 @@ parse_src_operand( ctx->cur++; } - if (parsed_ext_negate_paren) { - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ')') { - report_error( ctx, "Expected `)'" ); - return FALSE; - } - ctx->cur++; - } return TRUE; } @@ -857,11 +695,11 @@ parse_instruction( } if (i < info->num_dst) { - if (!parse_dst_operand( ctx, &inst.FullDstRegisters[i] )) + if (!parse_dst_operand( ctx, &inst.Dst[i] )) return FALSE; } else if (i < info->num_dst + info->num_src) { - if (!parse_src_operand( ctx, &inst.FullSrcRegisters[i - info->num_dst] )) + if (!parse_src_operand( ctx, &inst.Src[i - info->num_dst] )) return FALSE; } else { @@ -870,7 +708,8 @@ parse_instruction( for (j = 0; j < TGSI_TEXTURE_COUNT; j++) { if (str_match_no_case( &ctx->cur, texture_names[j] )) { if (!is_digit_alpha_underscore( ctx->cur )) { - inst.InstructionExtTexture.Texture = j; + inst.Instruction.Texture = 1; + inst.Texture.Texture = j; break; } } @@ -896,7 +735,8 @@ parse_instruction( report_error( ctx, "Expected a label" ); return FALSE; } - inst.InstructionExtLabel.Label = target; + inst.Instruction.Label = 1; + inst.Label.Label = target; } advance = tgsi_build_full_instruction( @@ -955,8 +795,8 @@ static boolean parse_declaration( struct translate_ctx *ctx ) decl = tgsi_default_full_declaration(); decl.Declaration.File = file; decl.Declaration.UsageMask = writemask; - decl.DeclarationRange.First = first; - decl.DeclarationRange.Last = last; + decl.Range.First = first; + decl.Range.Last = last; cur = ctx->cur; eat_opt_white( &cur ); @@ -987,13 +827,13 @@ static boolean parse_declaration( struct translate_ctx *ctx ) } cur2++; - decl.Semantic.SemanticIndex = index; + decl.Semantic.Index = index; cur = cur2; } decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = i; + decl.Semantic.Name = i; ctx->cur = cur; break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.c b/src/gallium/auxiliary/tgsi/tgsi_transform.c index bc9c18fd4a..8b8f489b35 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_transform.c +++ b/src/gallium/auxiliary/tgsi/tgsi_transform.c @@ -130,15 +130,13 @@ tgsi_transform_shader(const struct tgsi_token *tokens_in, /** ** Setup output shader **/ - *(struct tgsi_version *) &tokens_out[0] = tgsi_build_version(); - - ctx->header = (struct tgsi_header *) (tokens_out + 1); + ctx->header = (struct tgsi_header *)tokens_out; *ctx->header = tgsi_build_header(); - processor = (struct tgsi_processor *) (tokens_out + 2); + processor = (struct tgsi_processor *) (tokens_out + 1); *processor = tgsi_build_processor( procType, ctx->header ); - ctx->ti = 3; + ctx->ti = 2; /** @@ -215,7 +213,7 @@ tgsi_transform_foo( struct tgsi_token *tokens_out, uint max_tokens_out ) { const char *text = - "FRAG1.1\n" + "FRAG\n" "DCL IN[0], COLOR, CONSTANT\n" "DCL OUT[0], COLOR\n" " 0: MOV OUT[0], IN[0]\n" diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 654426a903..3f943845f5 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -29,6 +29,7 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" #include "tgsi/tgsi_ureg.h" +#include "tgsi/tgsi_build.h" #include "tgsi/tgsi_info.h" #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_sanity.h" @@ -36,7 +37,6 @@ #include "util/u_math.h" union tgsi_any_token { - struct tgsi_version version; struct tgsi_header header; struct tgsi_processor processor; struct tgsi_token token; @@ -46,18 +46,12 @@ union tgsi_any_token { struct tgsi_immediate imm; union tgsi_immediate_data imm_data; struct tgsi_instruction insn; - struct tgsi_instruction_ext_nv insn_ext_nv; - struct tgsi_instruction_ext_label insn_ext_label; - struct tgsi_instruction_ext_texture insn_ext_texture; - struct tgsi_instruction_ext_predicate insn_ext_predicate; + struct tgsi_instruction_predicate insn_predicate; + struct tgsi_instruction_label insn_label; + struct tgsi_instruction_texture insn_texture; struct tgsi_src_register src; - struct tgsi_src_register_ext_swz src_ext_swz; - struct tgsi_src_register_ext_mod src_ext_mod; struct tgsi_dimension dim; struct tgsi_dst_register dst; - struct tgsi_dst_register_ext_concode dst_ext_code; - struct tgsi_dst_register_ext_modulate dst_ext_mod; - struct tgsi_dst_register_ext_predicate dst_ext_pred; unsigned value; }; @@ -75,6 +69,8 @@ struct ureg_tokens { #define UREG_MAX_IMMEDIATE 32 #define UREG_MAX_TEMP 256 #define UREG_MAX_ADDR 2 +#define UREG_MAX_LOOP 1 +#define UREG_MAX_PRED 1 #define DOMAIN_DECL 0 #define DOMAIN_INSN 1 @@ -118,6 +114,8 @@ struct ureg_program unsigned nr_constant_ranges; unsigned nr_addrs; + unsigned nr_preds; + unsigned nr_loops; unsigned nr_instructions; struct ureg_tokens domain[2]; @@ -141,8 +139,9 @@ static void tokens_expand( struct ureg_tokens *tokens, { unsigned old_size = tokens->size * sizeof(unsigned); - if (tokens->tokens == error_tokens) - goto fail; + if (tokens->tokens == error_tokens) { + return; + } while (tokens->count + count > tokens->size) { tokens->size = (1 << ++tokens->order); @@ -151,13 +150,9 @@ static void tokens_expand( struct ureg_tokens *tokens, tokens->tokens = REALLOC(tokens->tokens, old_size, tokens->size * sizeof(unsigned)); - if (tokens->tokens == NULL) - goto fail; - - return; - -fail: - tokens_error(tokens); + if (tokens->tokens == NULL) { + tokens_error(tokens); + } } static void set_bad( struct ureg_program *ureg ) @@ -207,9 +202,13 @@ ureg_dst_register( unsigned file, dst.IndirectIndex = 0; dst.IndirectSwizzle = 0; dst.Saturate = 0; + dst.Predicate = 0; + dst.PredNegate = 0; + dst.PredSwizzleX = TGSI_SWIZZLE_X; + dst.PredSwizzleY = TGSI_SWIZZLE_Y; + dst.PredSwizzleZ = TGSI_SWIZZLE_Z; + dst.PredSwizzleW = TGSI_SWIZZLE_W; dst.Index = index; - dst.Pad1 = 0; - dst.Pad2 = 0; return dst; } @@ -350,6 +349,7 @@ struct ureg_src ureg_DECL_constant(struct ureg_program *ureg, i = ureg->nr_constant_ranges++; ureg->constant_range[i].first = index; ureg->constant_range[i].last = index; + goto out; } /* Collapse all ranges down to one: @@ -417,6 +417,32 @@ struct ureg_dst ureg_DECL_address( struct ureg_program *ureg ) return ureg_dst_register( TGSI_FILE_ADDRESS, 0 ); } +/* Allocate a new loop register. + */ +struct ureg_dst +ureg_DECL_loop(struct ureg_program *ureg) +{ + if (ureg->nr_loops < UREG_MAX_LOOP) { + return ureg_dst_register(TGSI_FILE_LOOP, ureg->nr_loops++); + } + + assert(0); + return ureg_dst_register(TGSI_FILE_LOOP, 0); +} + +/* Allocate a new predicate register. + */ +struct ureg_dst +ureg_DECL_predicate(struct ureg_program *ureg) +{ + if (ureg->nr_preds < UREG_MAX_PRED) { + return ureg_dst_register(TGSI_FILE_PREDICATE, ureg->nr_preds++); + } + + assert(0); + return ureg_dst_register(TGSI_FILE_PREDICATE, 0); +} + /* Allocate a new sampler. */ struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg, @@ -528,9 +554,7 @@ void ureg_emit_src( struct ureg_program *ureg, struct ureg_src src ) { - unsigned size = (1 + - (src.Absolute ? 1 : 0) + - (src.Indirect ? 1 : 0)); + unsigned size = 1 + (src.Indirect ? 1 : 0); union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size ); unsigned n = 0; @@ -547,17 +571,8 @@ ureg_emit_src( struct ureg_program *ureg, out[n].src.SwizzleW = src.SwizzleW; out[n].src.Index = src.Index; out[n].src.Negate = src.Negate; + out[0].src.Absolute = src.Absolute; n++; - - if (src.Absolute) { - out[0].src.Extended = 1; - out[0].src.Negate = 0; - out[n].value = 0; - out[n].src_ext_mod.Type = TGSI_SRC_REGISTER_EXT_TYPE_MOD; - out[n].src_ext_mod.Absolute = 1; - out[n].src_ext_mod.Negate = src.Negate; - n++; - } if (src.Indirect) { out[0].src.Indirect = 1; @@ -628,37 +643,54 @@ static void validate( unsigned opcode, #endif } -unsigned +struct ureg_emit_insn_result ureg_emit_insn(struct ureg_program *ureg, unsigned opcode, boolean saturate, + boolean predicate, + boolean pred_negate, + unsigned pred_swizzle_x, + unsigned pred_swizzle_y, + unsigned pred_swizzle_z, + unsigned pred_swizzle_w, unsigned num_dst, unsigned num_src ) { union tgsi_any_token *out; + uint count = predicate ? 2 : 1; + struct ureg_emit_insn_result result; validate( opcode, num_dst, num_src ); - out = get_tokens( ureg, DOMAIN_INSN, 1 ); - out[0].value = 0; - out[0].insn.Type = TGSI_TOKEN_TYPE_INSTRUCTION; - out[0].insn.NrTokens = 0; + out = get_tokens( ureg, DOMAIN_INSN, count ); + out[0].insn = tgsi_default_instruction(); out[0].insn.Opcode = opcode; out[0].insn.Saturate = saturate; out[0].insn.NumDstRegs = num_dst; out[0].insn.NumSrcRegs = num_src; - out[0].insn.Padding = 0; - out[0].insn.Extended = 0; - + + result.insn_token = ureg->domain[DOMAIN_INSN].count - count; + result.extended_token = result.insn_token; + + if (predicate) { + out[0].insn.Predicate = 1; + out[1].insn_predicate = tgsi_default_instruction_predicate(); + out[1].insn_predicate.Negate = pred_negate; + out[1].insn_predicate.SwizzleX = pred_swizzle_x; + out[1].insn_predicate.SwizzleY = pred_swizzle_y; + out[1].insn_predicate.SwizzleZ = pred_swizzle_z; + out[1].insn_predicate.SwizzleW = pred_swizzle_w; + } + ureg->nr_instructions++; - - return ureg->domain[DOMAIN_INSN].count - 1; + + return result; } void ureg_emit_label(struct ureg_program *ureg, - unsigned insn_token, + unsigned extended_token, unsigned *label_token ) { union tgsi_any_token *out, *insn; @@ -667,13 +699,11 @@ ureg_emit_label(struct ureg_program *ureg, return; out = get_tokens( ureg, DOMAIN_INSN, 1 ); - insn = retrieve_token( ureg, DOMAIN_INSN, insn_token ); + out[0].value = 0; - insn->insn.Extended = 1; + insn = retrieve_token( ureg, DOMAIN_INSN, extended_token ); + insn->insn.Label = 1; - out[0].value = 0; - out[0].insn_ext_label.Type = TGSI_INSTRUCTION_EXT_TYPE_LABEL; - *label_token = ureg->domain[DOMAIN_INSN].count - 1; } @@ -696,26 +726,24 @@ ureg_fixup_label(struct ureg_program *ureg, { union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, label_token ); - assert(out->insn_ext_label.Type == TGSI_INSTRUCTION_EXT_TYPE_LABEL); - out->insn_ext_label.Label = instruction_number; + out->insn_label.Label = instruction_number; } void ureg_emit_texture(struct ureg_program *ureg, - unsigned insn_token, + unsigned extended_token, unsigned target ) { union tgsi_any_token *out, *insn; out = get_tokens( ureg, DOMAIN_INSN, 1 ); - insn = retrieve_token( ureg, DOMAIN_INSN, insn_token ); + insn = retrieve_token( ureg, DOMAIN_INSN, extended_token ); - insn->insn.Extended = 1; + insn->insn.Texture = 1; out[0].value = 0; - out[0].insn_ext_texture.Type = TGSI_INSTRUCTION_EXT_TYPE_TEXTURE; - out[0].insn_ext_texture.Texture = target; + out[0].insn_texture.Texture = target; } @@ -738,12 +766,34 @@ ureg_insn(struct ureg_program *ureg, const struct ureg_src *src, unsigned nr_src ) { - unsigned insn, i; + struct ureg_emit_insn_result insn; + unsigned i; boolean saturate; + boolean predicate; + boolean negate; + unsigned swizzle[4]; saturate = nr_dst ? dst[0].Saturate : FALSE; + predicate = nr_dst ? dst[0].Predicate : FALSE; + if (predicate) { + negate = dst[0].PredNegate; + swizzle[0] = dst[0].PredSwizzleX; + swizzle[1] = dst[0].PredSwizzleY; + swizzle[2] = dst[0].PredSwizzleZ; + swizzle[3] = dst[0].PredSwizzleW; + } - insn = ureg_emit_insn( ureg, opcode, saturate, nr_dst, nr_src ); + insn = ureg_emit_insn(ureg, + opcode, + saturate, + predicate, + negate, + swizzle[0], + swizzle[1], + swizzle[2], + swizzle[3], + nr_dst, + nr_src); for (i = 0; i < nr_dst; i++) ureg_emit_dst( ureg, dst[i] ); @@ -751,7 +801,7 @@ ureg_insn(struct ureg_program *ureg, for (i = 0; i < nr_src; i++) ureg_emit_src( ureg, src[i] ); - ureg_fixup_insn_size( ureg, insn ); + ureg_fixup_insn_size( ureg, insn.insn_token ); } void @@ -763,14 +813,36 @@ ureg_tex_insn(struct ureg_program *ureg, const struct ureg_src *src, unsigned nr_src ) { - unsigned insn, i; + struct ureg_emit_insn_result insn; + unsigned i; boolean saturate; + boolean predicate; + boolean negate; + unsigned swizzle[4]; saturate = nr_dst ? dst[0].Saturate : FALSE; + predicate = nr_dst ? dst[0].Predicate : FALSE; + if (predicate) { + negate = dst[0].PredNegate; + swizzle[0] = dst[0].PredSwizzleX; + swizzle[1] = dst[0].PredSwizzleY; + swizzle[2] = dst[0].PredSwizzleZ; + swizzle[3] = dst[0].PredSwizzleW; + } - insn = ureg_emit_insn( ureg, opcode, saturate, nr_dst, nr_src ); + insn = ureg_emit_insn(ureg, + opcode, + saturate, + predicate, + negate, + swizzle[0], + swizzle[1], + swizzle[2], + swizzle[3], + nr_dst, + nr_src); - ureg_emit_texture( ureg, insn, target ); \ + ureg_emit_texture( ureg, insn.extended_token, target ); for (i = 0; i < nr_dst; i++) ureg_emit_dst( ureg, dst[i] ); @@ -778,7 +850,7 @@ ureg_tex_insn(struct ureg_program *ureg, for (i = 0; i < nr_src; i++) ureg_emit_src( ureg, src[i] ); - ureg_fixup_insn_size( ureg, insn ); + ureg_fixup_insn_size( ureg, insn.insn_token ); } @@ -789,16 +861,27 @@ ureg_label_insn(struct ureg_program *ureg, unsigned nr_src, unsigned *label_token ) { - unsigned insn, i; + struct ureg_emit_insn_result insn; + unsigned i; - insn = ureg_emit_insn( ureg, opcode, FALSE, 0, nr_src ); + insn = ureg_emit_insn(ureg, + opcode, + FALSE, + FALSE, + FALSE, + TGSI_SWIZZLE_X, + TGSI_SWIZZLE_Y, + TGSI_SWIZZLE_Z, + TGSI_SWIZZLE_W, + 0, + nr_src); - ureg_emit_label( ureg, insn, label_token ); \ + ureg_emit_label( ureg, insn.extended_token, label_token ); for (i = 0; i < nr_src; i++) ureg_emit_src( ureg, src[i] ); - ureg_fixup_insn_size( ureg, insn ); + ureg_fixup_insn_size( ureg, insn.insn_token ); } @@ -825,8 +908,8 @@ static void emit_decl( struct ureg_program *ureg, out[1].decl_range.Last = index; out[2].value = 0; - out[2].decl_semantic.SemanticName = semantic_name; - out[2].decl_semantic.SemanticIndex = semantic_index; + out[2].decl_semantic.Name = semantic_name; + out[2].decl_semantic.Index = semantic_index; } @@ -861,7 +944,6 @@ static void emit_immediate( struct ureg_program *ureg, out[0].imm.NrTokens = 5; out[0].imm.DataType = TGSI_IMM_FLOAT32; out[0].imm.Padding = 0; - out[0].imm.Extended = 0; out[1].imm_data.Float = v[0]; out[2].imm_data.Float = v[1]; @@ -930,6 +1012,20 @@ static void emit_decls( struct ureg_program *ureg ) 0, ureg->nr_addrs ); } + if (ureg->nr_loops) { + emit_decl_range(ureg, + TGSI_FILE_LOOP, + 0, + ureg->nr_loops); + } + + if (ureg->nr_preds) { + emit_decl_range(ureg, + TGSI_FILE_PREDICATE, + 0, + ureg->nr_preds); + } + for (i = 0; i < ureg->nr_immediates; i++) { emit_immediate( ureg, ureg->immediate[i].v ); @@ -955,26 +1051,22 @@ static void copy_instructions( struct ureg_program *ureg ) static void fixup_header_size(struct ureg_program *ureg) { - union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_DECL, 1 ); + union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_DECL, 0 ); - out->header.BodySize = ureg->domain[DOMAIN_DECL].count - 3; + out->header.BodySize = ureg->domain[DOMAIN_DECL].count - 2; } static void emit_header( struct ureg_program *ureg ) { - union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 3 ); - - out[0].version.MajorVersion = 1; - out[0].version.MinorVersion = 1; - out[0].version.Padding = 0; + union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 ); - out[1].header.HeaderSize = 2; - out[1].header.BodySize = 0; + out[0].header.HeaderSize = 2; + out[0].header.BodySize = 0; - out[2].processor.Processor = ureg->processor; - out[2].processor.Padding = 0; + out[1].processor.Processor = ureg->processor; + out[1].processor.Padding = 0; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index f04f443b9e..94cc70a208 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -67,9 +67,13 @@ struct ureg_dst unsigned WriteMask : 4; /* TGSI_WRITEMASK_ */ unsigned Indirect : 1; /* BOOL */ unsigned Saturate : 1; /* BOOL */ + unsigned Predicate : 1; + unsigned PredNegate : 1; /* BOOL */ + unsigned PredSwizzleX: 2; /* TGSI_SWIZZLE_ */ + unsigned PredSwizzleY: 2; /* TGSI_SWIZZLE_ */ + unsigned PredSwizzleZ: 2; /* TGSI_SWIZZLE_ */ + unsigned PredSwizzleW: 2; /* TGSI_SWIZZLE_ */ int Index : 16; /* SINT */ - unsigned Pad1 : 5; - unsigned Pad2 : 1; /* BOOL */ int IndirectIndex : 16; /* SINT */ int IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */ }; @@ -153,6 +157,12 @@ ureg_release_temporary( struct ureg_program *ureg, struct ureg_dst ureg_DECL_address( struct ureg_program * ); +struct ureg_dst +ureg_DECL_loop( struct ureg_program * ); + +struct ureg_dst +ureg_DECL_predicate(struct ureg_program *); + /* Supply an index to the sampler declaration as this is the hook to * the external pipe_sampler state. Users of this function probably * don't want just any sampler, but a specific one which they've set @@ -266,10 +276,21 @@ ureg_label_insn(struct ureg_program *ureg, * Internal instruction helpers, don't call these directly: */ -unsigned +struct ureg_emit_insn_result { + unsigned insn_token; /*< Used to fixup insn size. */ + unsigned extended_token; /*< Used to set the Extended bit, usually the same as insn_token. */ +}; + +struct ureg_emit_insn_result ureg_emit_insn(struct ureg_program *ureg, unsigned opcode, boolean saturate, + boolean predicate, + boolean pred_negate, + unsigned pred_swizzle_x, + unsigned pred_swizzle_y, + unsigned pred_swizzle_z, + unsigned pred_swizzle_w, unsigned num_dst, unsigned num_src ); @@ -300,7 +321,17 @@ ureg_fixup_insn_size(struct ureg_program *ureg, static INLINE void ureg_##op( struct ureg_program *ureg ) \ { \ unsigned opcode = TGSI_OPCODE_##op; \ - unsigned insn = ureg_emit_insn( ureg, opcode, FALSE, 0, 0 ); \ + unsigned insn = ureg_emit_insn(ureg, \ + opcode, \ + FALSE, \ + FALSE, \ + FALSE, \ + TGSI_SWIZZLE_X, \ + TGSI_SWIZZLE_Y, \ + TGSI_SWIZZLE_Z, \ + TGSI_SWIZZLE_W, \ + 0, \ + 0).insn_token; \ ureg_fixup_insn_size( ureg, insn ); \ } @@ -309,7 +340,17 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \ struct ureg_src src ) \ { \ unsigned opcode = TGSI_OPCODE_##op; \ - unsigned insn = ureg_emit_insn( ureg, opcode, FALSE, 0, 1 ); \ + unsigned insn = ureg_emit_insn(ureg, \ + opcode, \ + FALSE, \ + FALSE, \ + FALSE, \ + TGSI_SWIZZLE_X, \ + TGSI_SWIZZLE_Y, \ + TGSI_SWIZZLE_Z, \ + TGSI_SWIZZLE_W, \ + 0, \ + 1).insn_token; \ ureg_emit_src( ureg, src ); \ ureg_fixup_insn_size( ureg, insn ); \ } @@ -319,9 +360,20 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \ unsigned *label_token ) \ { \ unsigned opcode = TGSI_OPCODE_##op; \ - unsigned insn = ureg_emit_insn( ureg, opcode, FALSE, 0, 0 ); \ - ureg_emit_label( ureg, insn, label_token ); \ - ureg_fixup_insn_size( ureg, insn ); \ + struct ureg_emit_insn_result insn; \ + insn = ureg_emit_insn(ureg, \ + opcode, \ + FALSE, \ + FALSE, \ + FALSE, \ + TGSI_SWIZZLE_X, \ + TGSI_SWIZZLE_Y, \ + TGSI_SWIZZLE_Z, \ + TGSI_SWIZZLE_W, \ + 0, \ + 0); \ + ureg_emit_label( ureg, insn.extended_token, label_token ); \ + ureg_fixup_insn_size( ureg, insn.insn_token ); \ } #define OP01_LBL( op ) \ @@ -330,10 +382,21 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \ unsigned *label_token ) \ { \ unsigned opcode = TGSI_OPCODE_##op; \ - unsigned insn = ureg_emit_insn( ureg, opcode, FALSE, 0, 1 ); \ - ureg_emit_label( ureg, insn, label_token ); \ + struct ureg_emit_insn_result insn; \ + insn = ureg_emit_insn(ureg, \ + opcode, \ + FALSE, \ + FALSE, \ + FALSE, \ + TGSI_SWIZZLE_X, \ + TGSI_SWIZZLE_Y, \ + TGSI_SWIZZLE_Z, \ + TGSI_SWIZZLE_W, \ + 0, \ + 1); \ + ureg_emit_label( ureg, insn.extended_token, label_token ); \ ureg_emit_src( ureg, src ); \ - ureg_fixup_insn_size( ureg, insn ); \ + ureg_fixup_insn_size( ureg, insn.insn_token ); \ } #define OP10( op ) \ @@ -341,7 +404,17 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \ struct ureg_dst dst ) \ { \ unsigned opcode = TGSI_OPCODE_##op; \ - unsigned insn = ureg_emit_insn( ureg, opcode, dst.Saturate, 1, 0 ); \ + unsigned insn = ureg_emit_insn(ureg, \ + opcode, \ + dst.Saturate, \ + dst.Predicate, \ + dst.PredNegate, \ + dst.PredSwizzleX, \ + dst.PredSwizzleY, \ + dst.PredSwizzleZ, \ + dst.PredSwizzleW, \ + 1, \ + 0).insn_token; \ ureg_emit_dst( ureg, dst ); \ ureg_fixup_insn_size( ureg, insn ); \ } @@ -353,7 +426,17 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \ struct ureg_src src ) \ { \ unsigned opcode = TGSI_OPCODE_##op; \ - unsigned insn = ureg_emit_insn( ureg, opcode, dst.Saturate, 1, 1 ); \ + unsigned insn = ureg_emit_insn(ureg, \ + opcode, \ + dst.Saturate, \ + dst.Predicate, \ + dst.PredNegate, \ + dst.PredSwizzleX, \ + dst.PredSwizzleY, \ + dst.PredSwizzleZ, \ + dst.PredSwizzleW, \ + 1, \ + 1).insn_token; \ ureg_emit_dst( ureg, dst ); \ ureg_emit_src( ureg, src ); \ ureg_fixup_insn_size( ureg, insn ); \ @@ -366,7 +449,17 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \ struct ureg_src src1 ) \ { \ unsigned opcode = TGSI_OPCODE_##op; \ - unsigned insn = ureg_emit_insn( ureg, opcode, dst.Saturate, 1, 2 ); \ + unsigned insn = ureg_emit_insn(ureg, \ + opcode, \ + dst.Saturate, \ + dst.Predicate, \ + dst.PredNegate, \ + dst.PredSwizzleX, \ + dst.PredSwizzleY, \ + dst.PredSwizzleZ, \ + dst.PredSwizzleW, \ + 1, \ + 2).insn_token; \ ureg_emit_dst( ureg, dst ); \ ureg_emit_src( ureg, src0 ); \ ureg_emit_src( ureg, src1 ); \ @@ -381,12 +474,23 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \ struct ureg_src src1 ) \ { \ unsigned opcode = TGSI_OPCODE_##op; \ - unsigned insn = ureg_emit_insn( ureg, opcode, dst.Saturate, 1, 2 ); \ - ureg_emit_texture( ureg, insn, target ); \ + struct ureg_emit_insn_result insn; \ + insn = ureg_emit_insn(ureg, \ + opcode, \ + dst.Saturate, \ + dst.Predicate, \ + dst.PredNegate, \ + dst.PredSwizzleX, \ + dst.PredSwizzleY, \ + dst.PredSwizzleZ, \ + dst.PredSwizzleW, \ + 1, \ + 2); \ + ureg_emit_texture( ureg, insn.extended_token, target ); \ ureg_emit_dst( ureg, dst ); \ ureg_emit_src( ureg, src0 ); \ ureg_emit_src( ureg, src1 ); \ - ureg_fixup_insn_size( ureg, insn ); \ + ureg_fixup_insn_size( ureg, insn.insn_token ); \ } #define OP13( op ) \ @@ -397,7 +501,17 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \ struct ureg_src src2 ) \ { \ unsigned opcode = TGSI_OPCODE_##op; \ - unsigned insn = ureg_emit_insn( ureg, opcode, dst.Saturate, 1, 3 ); \ + unsigned insn = ureg_emit_insn(ureg, \ + opcode, \ + dst.Saturate, \ + dst.Predicate, \ + dst.PredNegate, \ + dst.PredSwizzleX, \ + dst.PredSwizzleY, \ + dst.PredSwizzleZ, \ + dst.PredSwizzleW, \ + 1, \ + 3).insn_token; \ ureg_emit_dst( ureg, dst ); \ ureg_emit_src( ureg, src0 ); \ ureg_emit_src( ureg, src1 ); \ @@ -415,14 +529,25 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \ struct ureg_src src3 ) \ { \ unsigned opcode = TGSI_OPCODE_##op; \ - unsigned insn = ureg_emit_insn( ureg, opcode, dst.Saturate, 1, 4 ); \ - ureg_emit_texture( ureg, insn, target ); \ + struct ureg_emit_insn_result insn; \ + insn = ureg_emit_insn(ureg, \ + opcode, \ + dst.Saturate, \ + dst.Predicate, \ + dst.PredNegate, \ + dst.PredSwizzleX, \ + dst.PredSwizzleY, \ + dst.PredSwizzleZ, \ + dst.PredSwizzleW, \ + 1, \ + 4); \ + ureg_emit_texture( ureg, insn.extended_token, target ); \ ureg_emit_dst( ureg, dst ); \ ureg_emit_src( ureg, src0 ); \ ureg_emit_src( ureg, src1 ); \ ureg_emit_src( ureg, src2 ); \ ureg_emit_src( ureg, src3 ); \ - ureg_fixup_insn_size( ureg, insn ); \ + ureg_fixup_insn_size( ureg, insn.insn_token ); \ } @@ -497,6 +622,24 @@ ureg_saturate( struct ureg_dst reg ) return reg; } +static INLINE struct ureg_dst +ureg_predicate(struct ureg_dst reg, + boolean negate, + unsigned swizzle_x, + unsigned swizzle_y, + unsigned swizzle_z, + unsigned swizzle_w) +{ + assert(reg.File != TGSI_FILE_NULL); + reg.Predicate = 1; + reg.PredNegate = negate; + reg.PredSwizzleX = swizzle_x; + reg.PredSwizzleY = swizzle_y; + reg.PredSwizzleZ = swizzle_z; + reg.PredSwizzleW = swizzle_w; + return reg; +} + static INLINE struct ureg_dst ureg_dst_indirect( struct ureg_dst reg, struct ureg_src addr ) { @@ -530,9 +673,13 @@ ureg_dst( struct ureg_src src ) dst.IndirectIndex = src.IndirectIndex; dst.IndirectSwizzle = src.IndirectSwizzle; dst.Saturate = 0; + dst.Predicate = 0; + dst.PredNegate = 0; + dst.PredSwizzleX = TGSI_SWIZZLE_X; + dst.PredSwizzleY = TGSI_SWIZZLE_Y; + dst.PredSwizzleZ = TGSI_SWIZZLE_Z; + dst.PredSwizzleW = TGSI_SWIZZLE_W; dst.Index = src.Index; - dst.Pad1 = 0; - dst.Pad2 = 0; return dst; } @@ -571,9 +718,13 @@ ureg_dst_undef( void ) dst.IndirectIndex = 0; dst.IndirectSwizzle = 0; dst.Saturate = 0; + dst.Predicate = 0; + dst.PredNegate = 0; + dst.PredSwizzleX = TGSI_SWIZZLE_X; + dst.PredSwizzleY = TGSI_SWIZZLE_Y; + dst.PredSwizzleZ = TGSI_SWIZZLE_Z; + dst.PredSwizzleW = TGSI_SWIZZLE_W; dst.Index = 0; - dst.Pad1 = 0; - dst.Pad2 = 0; return dst; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c index 71f8a6ca40..f4ca9e21ed 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -69,59 +69,15 @@ tgsi_util_get_src_register_swizzle( return 0; } -unsigned -tgsi_util_get_src_register_extswizzle( - const struct tgsi_src_register_ext_swz *reg, - unsigned component ) -{ - switch( component ) { - case 0: - return reg->ExtSwizzleX; - case 1: - return reg->ExtSwizzleY; - case 2: - return reg->ExtSwizzleZ; - case 3: - return reg->ExtSwizzleW; - default: - assert( 0 ); - } - return 0; -} unsigned -tgsi_util_get_full_src_register_extswizzle( +tgsi_util_get_full_src_register_swizzle( const struct tgsi_full_src_register *reg, unsigned component ) { - unsigned swizzle; - - /* - * First, calculate the extended swizzle for a given channel. This will give - * us either a channel index into the simple swizzle or a constant 1 or 0. - */ - swizzle = tgsi_util_get_src_register_extswizzle( - ®->SrcRegisterExtSwz, + return tgsi_util_get_src_register_swizzle( + ®->Register, component ); - - assert (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X); - assert (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y); - assert (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z); - assert (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W); - assert (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W); - assert (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W); - - /* - * Second, calculate the simple swizzle for the unswizzled channel index. - * Leave the constants intact, they are not affected by the simple swizzle. - */ - if( swizzle <= TGSI_SWIZZLE_W ) { - swizzle = tgsi_util_get_src_register_swizzle( - ®->SrcRegister, - swizzle ); - } - - return swizzle; } void @@ -148,74 +104,6 @@ tgsi_util_set_src_register_swizzle( } } -void -tgsi_util_set_src_register_extswizzle( - struct tgsi_src_register_ext_swz *reg, - unsigned swizzle, - unsigned component ) -{ - switch( component ) { - case 0: - reg->ExtSwizzleX = swizzle; - break; - case 1: - reg->ExtSwizzleY = swizzle; - break; - case 2: - reg->ExtSwizzleZ = swizzle; - break; - case 3: - reg->ExtSwizzleW = swizzle; - break; - default: - assert( 0 ); - } -} - -unsigned -tgsi_util_get_src_register_extnegate( - const struct tgsi_src_register_ext_swz *reg, - unsigned component ) -{ - switch( component ) { - case 0: - return reg->NegateX; - case 1: - return reg->NegateY; - case 2: - return reg->NegateZ; - case 3: - return reg->NegateW; - default: - assert( 0 ); - } - return 0; -} - -void -tgsi_util_set_src_register_extnegate( - struct tgsi_src_register_ext_swz *reg, - unsigned negate, - unsigned component ) -{ - switch( component ) { - case 0: - reg->NegateX = negate; - break; - case 1: - reg->NegateY = negate; - break; - case 2: - reg->NegateZ = negate; - break; - case 3: - reg->NegateW = negate; - break; - default: - assert( 0 ); - } -} - unsigned tgsi_util_get_full_src_register_sign_mode( const struct tgsi_full_src_register *reg, @@ -223,10 +111,10 @@ tgsi_util_get_full_src_register_sign_mode( { unsigned sign_mode; - if( reg->SrcRegisterExtMod.Absolute ) { + if( reg->Register.Absolute ) { /* Consider only the post-abs negation. */ - if( reg->SrcRegisterExtMod.Negate ) { + if( reg->Register.Negate ) { sign_mode = TGSI_UTIL_SIGN_SET; } else { @@ -234,19 +122,7 @@ tgsi_util_get_full_src_register_sign_mode( } } else { - /* Accumulate the three negations. */ - - unsigned negate; - - negate = reg->SrcRegister.Negate; - if( tgsi_util_get_src_register_extnegate( ®->SrcRegisterExtSwz, component ) ) { - negate = !negate; - } - if( reg->SrcRegisterExtMod.Negate ) { - negate = !negate; - } - - if( negate ) { + if( reg->Register.Negate ) { sign_mode = TGSI_UTIL_SIGN_TOGGLE; } else { @@ -262,35 +138,26 @@ tgsi_util_set_full_src_register_sign_mode( struct tgsi_full_src_register *reg, unsigned sign_mode ) { - reg->SrcRegisterExtSwz.NegateX = 0; - reg->SrcRegisterExtSwz.NegateY = 0; - reg->SrcRegisterExtSwz.NegateZ = 0; - reg->SrcRegisterExtSwz.NegateW = 0; - switch (sign_mode) { case TGSI_UTIL_SIGN_CLEAR: - reg->SrcRegister.Negate = 0; - reg->SrcRegisterExtMod.Absolute = 1; - reg->SrcRegisterExtMod.Negate = 0; + reg->Register.Negate = 0; + reg->Register.Absolute = 1; break; case TGSI_UTIL_SIGN_SET: - reg->SrcRegister.Negate = 0; - reg->SrcRegisterExtMod.Absolute = 1; - reg->SrcRegisterExtMod.Negate = 1; + reg->Register.Absolute = 1; + reg->Register.Negate = 1; break; case TGSI_UTIL_SIGN_TOGGLE: - reg->SrcRegister.Negate = 1; - reg->SrcRegisterExtMod.Absolute = 0; - reg->SrcRegisterExtMod.Negate = 0; + reg->Register.Negate = 1; + reg->Register.Absolute = 0; break; case TGSI_UTIL_SIGN_KEEP: - reg->SrcRegister.Negate = 0; - reg->SrcRegisterExtMod.Absolute = 0; - reg->SrcRegisterExtMod.Negate = 0; + reg->Register.Negate = 0; + reg->Register.Absolute = 0; break; default: diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.h b/src/gallium/auxiliary/tgsi/tgsi_util.h index 21eb656327..19ee2e7cf2 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.h +++ b/src/gallium/auxiliary/tgsi/tgsi_util.h @@ -33,7 +33,6 @@ extern "C" { #endif struct tgsi_src_register; -struct tgsi_src_register_ext_swz; struct tgsi_full_src_register; void * @@ -45,13 +44,9 @@ tgsi_util_get_src_register_swizzle( const struct tgsi_src_register *reg, unsigned component ); -unsigned -tgsi_util_get_src_register_extswizzle( - const struct tgsi_src_register_ext_swz *reg, - unsigned component); unsigned -tgsi_util_get_full_src_register_extswizzle( +tgsi_util_get_full_src_register_swizzle( const struct tgsi_full_src_register *reg, unsigned component ); @@ -61,23 +56,6 @@ tgsi_util_set_src_register_swizzle( unsigned swizzle, unsigned component ); -void -tgsi_util_set_src_register_extswizzle( - struct tgsi_src_register_ext_swz *reg, - unsigned swizzle, - unsigned component ); - -unsigned -tgsi_util_get_src_register_extnegate( - const struct tgsi_src_register_ext_swz *reg, - unsigned component ); - -void -tgsi_util_set_src_register_extnegate( - struct tgsi_src_register_ext_swz *reg, - unsigned negate, - unsigned component ); - #define TGSI_UTIL_SIGN_CLEAR 0 /* Force positive */ #define TGSI_UTIL_SIGN_SET 1 /* Force negative */ #define TGSI_UTIL_SIGN_TOGGLE 2 /* Negate */ diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 8d39b64c6c..266e7ee81e 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -217,7 +217,7 @@ ATTRIB( R8G8_SNORM, 2, char, FROM_8_SNORM, TO_8_SNORM ) ATTRIB( R8_SNORM, 1, char, FROM_8_SNORM, TO_8_SNORM ) ATTRIB( A8R8G8B8_UNORM, 4, ubyte, FROM_8_UNORM, TO_8_UNORM ) -//ATTRIB( R8G8B8A8_UNORM, 4, ubyte, FROM_8_UNORM, TO_8_UNORM ) +/*ATTRIB( R8G8B8A8_UNORM, 4, ubyte, FROM_8_UNORM, TO_8_UNORM )*/ ATTRIB( R32G32B32A32_FIXED, 4, int, FROM_32_FIXED, TO_32_FIXED ) ATTRIB( R32G32B32_FIXED, 3, int, FROM_32_FIXED, TO_32_FIXED ) diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index ae8d330a78..1d8bb55bbd 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -10,6 +10,7 @@ C_SOURCES = \ u_debug_stack.c \ u_blit.c \ u_cache.c \ + u_cpu_detect.c \ u_draw_quad.c \ u_format.c \ u_format_access.c \ diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index 28a5ab4256..8d99106d0b 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -24,10 +24,10 @@ util = env.ConvenienceLibrary( 'u_bitmask.c', 'u_blit.c', 'u_cache.c', + 'u_cpu_detect.c', 'u_debug.c', 'u_debug_dump.c', 'u_debug_memory.c', - 'u_debug_profile.c', 'u_debug_stack.c', 'u_debug_symbol.c', 'u_draw_quad.c', diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index c516317d70..abe1de3302 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -46,6 +46,7 @@ #include "util/u_memory.h" #include "util/u_simple_shaders.h" #include "util/u_surface.h" +#include "util/u_rect.h" #include "cso_cache/cso_context.h" @@ -182,47 +183,7 @@ get_next_slot( struct blit_state *ctx ) } -/** - * Setup vertex data for the textured quad we'll draw. - * Note: y=0=top - */ -static unsigned -setup_vertex_data(struct blit_state *ctx, - float x0, float y0, float x1, float y1, float z) -{ - unsigned offset; - - ctx->vertices[0][0][0] = x0; - ctx->vertices[0][0][1] = y0; - ctx->vertices[0][0][2] = z; - ctx->vertices[0][1][0] = 0.0f; /*s*/ - ctx->vertices[0][1][1] = 0.0f; /*t*/ - - ctx->vertices[1][0][0] = x1; - ctx->vertices[1][0][1] = y0; - ctx->vertices[1][0][2] = z; - ctx->vertices[1][1][0] = 1.0f; /*s*/ - ctx->vertices[1][1][1] = 0.0f; /*t*/ - - ctx->vertices[2][0][0] = x1; - ctx->vertices[2][0][1] = y1; - ctx->vertices[2][0][2] = z; - ctx->vertices[2][1][0] = 1.0f; - ctx->vertices[2][1][1] = 1.0f; - ctx->vertices[3][0][0] = x0; - ctx->vertices[3][0][1] = y1; - ctx->vertices[3][0][2] = z; - ctx->vertices[3][1][0] = 0.0f; - ctx->vertices[3][1][1] = 1.0f; - - offset = get_next_slot( ctx ); - - pipe_buffer_write(ctx->pipe->screen, ctx->vbuf, - offset, sizeof(ctx->vertices), ctx->vertices); - - return offset; -} /** @@ -315,15 +276,13 @@ util_blit_pixels_writemask(struct blit_state *ctx, { struct pipe_context *pipe = ctx->pipe; struct pipe_screen *screen = pipe->screen; - struct pipe_texture texTemp, *tex; - struct pipe_surface *texSurf; + struct pipe_texture *tex = NULL; struct pipe_framebuffer_state fb; const int srcW = abs(srcX1 - srcX0); const int srcH = abs(srcY1 - srcY0); - const int srcLeft = MIN2(srcX0, srcX1); - const int srcTop = MIN2(srcY0, srcY1); unsigned offset; boolean overlap; + float s0, t0, s1, t1; assert(filter == PIPE_TEX_MIPFILTER_NEAREST || filter == PIPE_TEX_MIPFILTER_LINEAR); @@ -343,7 +302,8 @@ util_blit_pixels_writemask(struct blit_state *ctx, * no overlapping. * Filter mode should not matter since there's no stretching. */ - if (dst->format == src->format && + if (pipe->surface_copy && + dst->format == src->format && srcX0 < srcX1 && dstX0 < dstX1 && srcY0 < srcY1 && @@ -358,54 +318,82 @@ util_blit_pixels_writemask(struct blit_state *ctx, return; } - if (srcLeft != srcX0) { - /* left-right flip */ - int tmp = dstX0; - dstX0 = dstX1; - dstX1 = tmp; - } - - if (srcTop != srcY0) { - /* up-down flip */ - int tmp = dstY0; - dstY0 = dstY1; - dstY1 = tmp; - } - assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)); - /* - * XXX for now we're always creating a temporary texture. - * Strictly speaking that's not always needed. + /* Create a temporary texture when src and dest alias or when src + * is anything other than a single-level 2d texture. + * + * This can still be improved upon. */ + if (util_same_surface(src, dst) || + src->texture->target != PIPE_TEXTURE_2D || + src->texture->last_level != 0) + { + struct pipe_texture texTemp; + struct pipe_surface *texSurf; + const int srcLeft = MIN2(srcX0, srcX1); + const int srcTop = MIN2(srcY0, srcY1); + + if (srcLeft != srcX0) { + /* left-right flip */ + int tmp = dstX0; + dstX0 = dstX1; + dstX1 = tmp; + } + + if (srcTop != srcY0) { + /* up-down flip */ + int tmp = dstY0; + dstY0 = dstY1; + dstY1 = tmp; + } + + /* create temp texture */ + memset(&texTemp, 0, sizeof(texTemp)); + texTemp.target = PIPE_TEXTURE_2D; + texTemp.format = src->format; + texTemp.last_level = 0; + texTemp.width0 = srcW; + texTemp.height0 = srcH; + texTemp.depth0 = 1; + + tex = screen->texture_create(screen, &texTemp); + if (!tex) + return; + + texSurf = screen->get_tex_surface(screen, tex, 0, 0, 0, + PIPE_BUFFER_USAGE_GPU_WRITE); + + /* load temp texture */ + if (pipe->surface_copy) { + pipe->surface_copy(pipe, + texSurf, 0, 0, /* dest */ + src, srcLeft, srcTop, /* src */ + srcW, srcH); /* size */ + } else { + util_surface_copy(pipe, FALSE, + texSurf, 0, 0, /* dest */ + src, srcLeft, srcTop, /* src */ + srcW, srcH); /* size */ + } + + /* free the surface, update the texture if necessary. + */ + pipe_surface_reference(&texSurf, NULL); + s0 = 0.0f; + s1 = 1.0f; + t0 = 0.0f; + t1 = 1.0f; + } + else { + pipe_texture_reference(&tex, src->texture); + s0 = srcX0 / (float)tex->width0; + s1 = srcX1 / (float)tex->width0; + t0 = srcY0 / (float)tex->height0; + t1 = srcY1 / (float)tex->height0; + } - /* create temp texture */ - memset(&texTemp, 0, sizeof(texTemp)); - texTemp.target = PIPE_TEXTURE_2D; - texTemp.format = src->format; - texTemp.last_level = 0; - texTemp.width[0] = srcW; - texTemp.height[0] = srcH; - texTemp.depth[0] = 1; - pf_get_block(src->format, &texTemp.block); - - tex = screen->texture_create(screen, &texTemp); - if (!tex) - return; - - texSurf = screen->get_tex_surface(screen, tex, 0, 0, 0, - PIPE_BUFFER_USAGE_GPU_WRITE); - - /* load temp texture */ - pipe->surface_copy(pipe, - texSurf, 0, 0, /* dest */ - src, srcLeft, srcTop, /* src */ - srcW, srcH); /* size */ - - /* free the surface, update the texture if necessary. - */ - pipe_surface_reference(&texSurf, NULL); /* save state (restored below) */ cso_save_blend(ctx->cso); @@ -447,9 +435,12 @@ util_blit_pixels_writemask(struct blit_state *ctx, cso_set_framebuffer(ctx->cso, &fb); /* draw quad */ - offset = setup_vertex_data(ctx, - (float) dstX0, (float) dstY0, - (float) dstX1, (float) dstY1, z); + offset = setup_vertex_data_tex(ctx, + (float) dstX0, (float) dstY0, + (float) dstX1, (float) dstY1, + s0, t0, + s1, t1, + z); util_draw_vertex_buffer(ctx->pipe, ctx->vbuf, offset, PIPE_PRIM_TRIANGLE_FAN, @@ -526,13 +517,13 @@ util_blit_pixels_tex(struct blit_state *ctx, assert(filter == PIPE_TEX_MIPFILTER_NEAREST || filter == PIPE_TEX_MIPFILTER_LINEAR); - assert(tex->width[0] != 0); - assert(tex->height[0] != 0); + assert(tex->width0 != 0); + assert(tex->height0 != 0); - s0 = srcX0 / (float)tex->width[0]; - s1 = srcX1 / (float)tex->width[0]; - t0 = srcY0 / (float)tex->height[0]; - t1 = srcY1 / (float)tex->height[0]; + s0 = srcX0 / (float)tex->width0; + s1 = srcX1 / (float)tex->width0; + t0 = srcY0 / (float)tex->height0; + t1 = srcY1 / (float)tex->height0; assert(ctx->pipe->screen->is_format_supported(ctx->pipe->screen, dst->format, PIPE_TEXTURE_2D, diff --git a/src/gallium/auxiliary/util/u_clear.h b/src/gallium/auxiliary/util/u_clear.h index 7c16b32cf9..2c32db6175 100644 --- a/src/gallium/auxiliary/util/u_clear.h +++ b/src/gallium/auxiliary/util/u_clear.h @@ -32,6 +32,7 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" #include "util/u_pack_color.h" +#include "util/u_rect.h" /** @@ -45,16 +46,25 @@ util_clear(struct pipe_context *pipe, { if (buffers & PIPE_CLEAR_COLOR) { struct pipe_surface *ps = framebuffer->cbufs[0]; - unsigned color; + union util_color uc; - util_pack_color(rgba, ps->format, &color); - pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, color); + util_pack_color(rgba, ps->format, &uc); + if (pipe->surface_fill) { + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, uc.ui); + } else { + util_surface_fill(pipe, ps, 0, 0, ps->width, ps->height, uc.ui); + } } if (buffers & PIPE_CLEAR_DEPTHSTENCIL) { struct pipe_surface *ps = framebuffer->zsbuf; - pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, - util_pack_z_stencil(ps->format, depth, stencil)); + if (pipe->surface_fill) { + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, + util_pack_z_stencil(ps->format, depth, stencil)); + } else { + util_surface_fill(pipe, ps, 0, 0, ps->width, ps->height, + util_pack_z_stencil(ps->format, depth, stencil)); + } } } diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c index d9f2f8fc28..a08241971c 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.c +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -24,23 +24,21 @@ * **************************************************************************/ -/* - * Based on the work of Eric Anholt <anholt@FreeBSD.org> +/** + * @file + * CPU feature detection. + * + * @author Dennis Smit + * @author Based on the work of Eric Anholt <anholt@FreeBSD.org> */ -/* FIXME: clean this entire file up */ +#include "pipe/p_config.h" +#include "u_debug.h" #include "u_cpu_detect.h" -#ifdef __linux__ -#define OS_LINUX -#endif -#ifdef WIN32 -#define OS_WIN32 -#endif - -#if defined(ARCH_POWERPC) -#if defined(OS_DARWIN) +#if defined(PIPE_ARCH_PPC) +#if defined(PIPE_OS_DARWIN) #include <sys/sysctl.h> #else #include <signal.h> @@ -48,140 +46,147 @@ #endif #endif -#if defined(OS_NETBSD) || defined(OS_OPENBSD) +#if defined(PIPE_OS_NETBSD) || defined(PIPE_OS_OPENBSD) #include <sys/param.h> #include <sys/sysctl.h> #include <machine/cpu.h> #endif -#if defined(OS_FREEBSD) +#if defined(PIPE_OS_FREEBSD) #include <sys/types.h> #include <sys/sysctl.h> #endif -#if defined(OS_LINUX) +#if defined(PIPE_OS_LINUX) #include <signal.h> #endif -#if defined(OS_WIN32) -#include <windows.h> +#ifdef PIPE_OS_UNIX +#include <unistd.h> #endif -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <string.h> +#if defined(PIPE_OS_WINDOWS) +#include <windows.h> +#if defined(MSVC) +#include <intrin.h> +#endif +#endif -static struct cpu_detect_caps __cpu_detect_caps; -static int __cpu_detect_initialized = 0; +struct util_cpu_caps util_cpu_caps; static int has_cpuid(void); -static int cpuid(unsigned int ax, unsigned int *p); + +#if defined(PIPE_ARCH_X86) /* The sigill handlers */ -#if defined(ARCH_X86) /* x86 (linux katmai handler check thing) */ -#if defined(OS_LINUX) && defined(_POSIX_SOURCE) && defined(X86_FXSR_MAGIC) -static void sigill_handler_sse(int signal, struct sigcontext sc) +#if defined(PIPE_OS_LINUX) /*&& defined(_POSIX_SOURCE) && defined(X86_FXSR_MAGIC)*/ +static void +sigill_handler_sse(int signal, struct sigcontext sc) { - /* Both the "xorps %%xmm0,%%xmm0" and "divps %xmm0,%%xmm1" - * instructions are 3 bytes long. We must increment the instruction - * pointer manually to avoid repeated execution of the offending - * instruction. - * - * If the SIGILL is caused by a divide-by-zero when unmasked - * exceptions aren't supported, the SIMD FPU status and control - * word will be restored at the end of the test, so we don't need - * to worry about doing it here. Besides, we may not be able to... - */ - sc.eip += 3; - - __cpu_detect_caps.hasSSE=0; + /* Both the "xorps %%xmm0,%%xmm0" and "divps %xmm0,%%xmm1" + * instructions are 3 bytes long. We must increment the instruction + * pointer manually to avoid repeated execution of the offending + * instruction. + * + * If the SIGILL is caused by a divide-by-zero when unmasked + * exceptions aren't supported, the SIMD FPU status and control + * word will be restored at the end of the test, so we don't need + * to worry about doing it here. Besides, we may not be able to... + */ + sc.eip += 3; + + util_cpu_caps.has_sse=0; } -static void sigfpe_handler_sse(int signal, struct sigcontext sc) +static void +sigfpe_handler_sse(int signal, struct sigcontext sc) { - if (sc.fpstate->magic != 0xffff) { - /* Our signal context has the extended FPU state, so reset the - * divide-by-zero exception mask and clear the divide-by-zero - * exception bit. - */ - sc.fpstate->mxcsr |= 0x00000200; - sc.fpstate->mxcsr &= 0xfffffffb; - } else { - /* If we ever get here, we're completely hosed. - */ - } + if (sc.fpstate->magic != 0xffff) { + /* Our signal context has the extended FPU state, so reset the + * divide-by-zero exception mask and clear the divide-by-zero + * exception bit. + */ + sc.fpstate->mxcsr |= 0x00000200; + sc.fpstate->mxcsr &= 0xfffffffb; + } else { + /* If we ever get here, we're completely hosed. + */ + } } -#endif -#endif /* OS_LINUX && _POSIX_SOURCE && X86_FXSR_MAGIC */ +#endif /* PIPE_OS_LINUX && _POSIX_SOURCE && X86_FXSR_MAGIC */ -#if defined(OS_WIN32) -LONG CALLBACK win32_sig_handler_sse(EXCEPTION_POINTERS* ep) +#if defined(PIPE_OS_WINDOWS) +static LONG CALLBACK +win32_sig_handler_sse(EXCEPTION_POINTERS* ep) { - if(ep->ExceptionRecord->ExceptionCode==EXCEPTION_ILLEGAL_INSTRUCTION){ - ep->ContextRecord->Eip +=3; - __cpu_detect_caps.hasSSE=0; - return EXCEPTION_CONTINUE_EXECUTION; - } - return EXCEPTION_CONTINUE_SEARCH; + if(ep->ExceptionRecord->ExceptionCode==EXCEPTION_ILLEGAL_INSTRUCTION){ + ep->ContextRecord->Eip +=3; + util_cpu_caps.has_sse=0; + return EXCEPTION_CONTINUE_EXECUTION; + } + return EXCEPTION_CONTINUE_SEARCH; } -#endif /* OS_WIN32 */ +#endif /* PIPE_OS_WINDOWS */ +#endif /* PIPE_ARCH_X86 */ -#if defined(ARCH_POWERPC) && !defined(OS_DARWIN) -static sigjmp_buf __lv_powerpc_jmpbuf; -static volatile sig_atomic_t __lv_powerpc_canjump = 0; -static void sigill_handler (int sig); +#if defined(PIPE_ARCH_PPC) && !defined(PIPE_OS_DARWIN) +static jmp_buf __lv_powerpc_jmpbuf; +static volatile sig_atomic_t __lv_powerpc_canjump = 0; -static void sigill_handler (int sig) +static void +sigill_handler(int sig) { - if (!__lv_powerpc_canjump) { - signal (sig, SIG_DFL); - raise (sig); - } + if (!__lv_powerpc_canjump) { + signal (sig, SIG_DFL); + raise (sig); + } - __lv_powerpc_canjump = 0; - siglongjmp(__lv_powerpc_jmpbuf, 1); + __lv_powerpc_canjump = 0; + longjmp(__lv_powerpc_jmpbuf, 1); } +#endif -static void check_os_altivec_support(void) +#if defined(PIPE_ARCH_PPC) +static void +check_os_altivec_support(void) { -#if defined(OS_DARWIN) - int sels[2] = {CTL_HW, HW_VECTORUNIT}; - int has_vu = 0; - int len = sizeof (has_vu); - int err; - - err = sysctl(sels, 2, &has_vu, &len, NULL, 0); - - if (err == 0) { - if (has_vu != 0) { - __cpu_detect_caps.hasAltiVec = 1; - } - } -#else /* !OS_DARWIN */ - /* no Darwin, do it the brute-force way */ - /* this is borrowed from the libmpeg2 library */ - signal(SIGILL, sigill_handler); - if (sigsetjmp(__lv_powerpc_jmpbuf, 1)) { - signal(SIGILL, SIG_DFL); - } else { - __lv_powerpc_canjump = 1; - - __asm __volatile - ("mtspr 256, %0\n\t" - "vand %%v0, %%v0, %%v0" - : - : "r" (-1)); - - signal(SIGILL, SIG_DFL); - __cpu_detect_caps.hasAltiVec = 1; - } -#endif +#if defined(PIPE_OS_DARWIN) + int sels[2] = {CTL_HW, HW_VECTORUNIT}; + int has_vu = 0; + int len = sizeof (has_vu); + int err; + + err = sysctl(sels, 2, &has_vu, &len, NULL, 0); + + if (err == 0) { + if (has_vu != 0) { + util_cpu_caps.has_altivec = 1; + } + } +#else /* !PIPE_OS_DARWIN */ + /* no Darwin, do it the brute-force way */ + /* this is borrowed from the libmpeg2 library */ + signal(SIGILL, sigill_handler); + if (setjmp(__lv_powerpc_jmpbuf)) { + signal(SIGILL, SIG_DFL); + } else { + __lv_powerpc_canjump = 1; + + __asm __volatile + ("mtspr 256, %0\n\t" + "vand %%v0, %%v0, %%v0" + : + : "r" (-1)); + + signal(SIGILL, SIG_DFL); + util_cpu_caps.has_altivec = 1; + } +#endif /* PIPE_OS_DARWIN */ } -#endif +#endif /* PIPE_ARCH_PPC */ /* If we're running on a processor that can do SSE, let's see if we * are allowed to or not. This will catch 2.4.0 or later kernels that @@ -189,318 +194,327 @@ static void check_os_altivec_support(void) * and RedHat patched 2.2 kernels that have broken exception handling * support for user space apps that do SSE. */ -static void check_os_katmai_support(void) +#if defined(PIPE_ARCH_X86) || defined (PIPE_ARCH_X86_64) +static void +check_os_katmai_support(void) { -#if defined(ARCH_X86) -#if defined(OS_FREEBSD) - int has_sse=0, ret; - int len = sizeof (has_sse); - - ret = sysctlbyname("hw.instruction_sse", &has_sse, &len, NULL, 0); - if (ret || !has_sse) - __cpu_detect_caps.hasSSE=0; - -#elif defined(OS_NETBSD) || defined(OS_OPENBSD) - int has_sse, has_sse2, ret, mib[2]; - int varlen; - - mib[0] = CTL_MACHDEP; - mib[1] = CPU_SSE; - varlen = sizeof (has_sse); - - ret = sysctl(mib, 2, &has_sse, &varlen, NULL, 0); - if (ret < 0 || !has_sse) { - __cpu_detect_caps.hasSSE = 0; - } else { - __cpu_detect_caps.hasSSE = 1; - } - - mib[1] = CPU_SSE2; - varlen = sizeof (has_sse2); - ret = sysctl(mib, 2, &has_sse2, &varlen, NULL, 0); - if (ret < 0 || !has_sse2) { - __cpu_detect_caps.hasSSE2 = 0; - } else { - __cpu_detect_caps.hasSSE2 = 1; - } - __cpu_detect_caps.hasSSE = 0; /* FIXME ?!?!? */ - -#elif defined(OS_WIN32) - LPTOP_LEVEL_EXCEPTION_FILTER exc_fil; - if (__cpu_detect_caps.hasSSE) { - exc_fil = SetUnhandledExceptionFilter(win32_sig_handler_sse); - __asm __volatile ("xorps %xmm0, %xmm0"); - SetUnhandledExceptionFilter(exc_fil); - } -#elif defined(OS_LINUX) - struct sigaction saved_sigill; - struct sigaction saved_sigfpe; - - /* Save the original signal handlers. - */ - sigaction(SIGILL, NULL, &saved_sigill); - sigaction(SIGFPE, NULL, &saved_sigfpe); - - signal(SIGILL, (void (*)(int))sigill_handler_sse); - signal(SIGFPE, (void (*)(int))sigfpe_handler_sse); - - /* Emulate test for OSFXSR in CR4. The OS will set this bit if it - * supports the extended FPU save and restore required for SSE. If - * we execute an SSE instruction on a PIII and get a SIGILL, the OS - * doesn't support Streaming SIMD Exceptions, even if the processor - * does. - */ - if (__cpu_detect_caps.hasSSE) { - __asm __volatile ("xorps %xmm1, %xmm0"); - } - - /* Emulate test for OSXMMEXCPT in CR4. The OS will set this bit if - * it supports unmasked SIMD FPU exceptions. If we unmask the - * exceptions, do a SIMD divide-by-zero and get a SIGILL, the OS - * doesn't support unmasked SIMD FPU exceptions. If we get a SIGFPE - * as expected, we're okay but we need to clean up after it. - * - * Are we being too stringent in our requirement that the OS support - * unmasked exceptions? Certain RedHat 2.2 kernels enable SSE by - * setting CR4.OSFXSR but don't support unmasked exceptions. Win98 - * doesn't even support them. We at least know the user-space SSE - * support is good in kernels that do support unmasked exceptions, - * and therefore to be safe I'm going to leave this test in here. - */ - if (__cpu_detect_caps.hasSSE) { - // test_os_katmai_exception_support(); - } - - /* Restore the original signal handlers. - */ - sigaction(SIGILL, &saved_sigill, NULL); - sigaction(SIGFPE, &saved_sigfpe, NULL); +#if defined(PIPE_ARCH_X86) +#if defined(PIPE_OS_FREEBSD) + int has_sse=0, ret; + int len = sizeof (has_sse); + + ret = sysctlbyname("hw.instruction_sse", &has_sse, &len, NULL, 0); + if (ret || !has_sse) + util_cpu_caps.has_sse=0; + +#elif defined(PIPE_OS_NETBSD) || defined(PIPE_OS_OPENBSD) + int has_sse, has_sse2, ret, mib[2]; + int varlen; + + mib[0] = CTL_MACHDEP; + mib[1] = CPU_SSE; + varlen = sizeof (has_sse); + + ret = sysctl(mib, 2, &has_sse, &varlen, NULL, 0); + if (ret < 0 || !has_sse) { + util_cpu_caps.has_sse = 0; + } else { + util_cpu_caps.has_sse = 1; + } + + mib[1] = CPU_SSE2; + varlen = sizeof (has_sse2); + ret = sysctl(mib, 2, &has_sse2, &varlen, NULL, 0); + if (ret < 0 || !has_sse2) { + util_cpu_caps.has_sse2 = 0; + } else { + util_cpu_caps.has_sse2 = 1; + } + util_cpu_caps.has_sse = 0; /* FIXME ?!?!? */ + +#elif defined(PIPE_OS_WINDOWS) + LPTOP_LEVEL_EXCEPTION_FILTER exc_fil; + if (util_cpu_caps.has_sse) { + exc_fil = SetUnhandledExceptionFilter(win32_sig_handler_sse); +#if defined(PIPE_CC_GCC) + __asm __volatile ("xorps %xmm0, %xmm0"); +#elif defined(PIPE_CC_MSVC) + __asm { + xorps xmm0, xmm0 /* executing SSE instruction */ + } +#else +#error Unsupported compiler +#endif + SetUnhandledExceptionFilter(exc_fil); + } +#elif defined(PIPE_OS_LINUX) + struct sigaction saved_sigill; + struct sigaction saved_sigfpe; + + /* Save the original signal handlers. + */ + sigaction(SIGILL, NULL, &saved_sigill); + sigaction(SIGFPE, NULL, &saved_sigfpe); + + signal(SIGILL, (void (*)(int))sigill_handler_sse); + signal(SIGFPE, (void (*)(int))sigfpe_handler_sse); + + /* Emulate test for OSFXSR in CR4. The OS will set this bit if it + * supports the extended FPU save and restore required for SSE. If + * we execute an SSE instruction on a PIII and get a SIGILL, the OS + * doesn't support Streaming SIMD Exceptions, even if the processor + * does. + */ + if (util_cpu_caps.has_sse) { + __asm __volatile ("xorps %xmm1, %xmm0"); + } + + /* Emulate test for OSXMMEXCPT in CR4. The OS will set this bit if + * it supports unmasked SIMD FPU exceptions. If we unmask the + * exceptions, do a SIMD divide-by-zero and get a SIGILL, the OS + * doesn't support unmasked SIMD FPU exceptions. If we get a SIGFPE + * as expected, we're okay but we need to clean up after it. + * + * Are we being too stringent in our requirement that the OS support + * unmasked exceptions? Certain RedHat 2.2 kernels enable SSE by + * setting CR4.OSFXSR but don't support unmasked exceptions. Win98 + * doesn't even support them. We at least know the user-space SSE + * support is good in kernels that do support unmasked exceptions, + * and therefore to be safe I'm going to leave this test in here. + */ + if (util_cpu_caps.has_sse) { + /* test_os_katmai_exception_support(); */ + } + + /* Restore the original signal handlers. + */ + sigaction(SIGILL, &saved_sigill, NULL); + sigaction(SIGFPE, &saved_sigfpe, NULL); #else - /* We can't use POSIX signal handling to test the availability of - * SSE, so we disable it by default. - */ - __cpu_detect_caps.hasSSE = 0; + /* We can't use POSIX signal handling to test the availability of + * SSE, so we disable it by default. + */ + util_cpu_caps.has_sse = 0; #endif /* __linux__ */ #endif + +#if defined(PIPE_ARCH_X86_64) + util_cpu_caps.has_sse = 1; +#endif } static int has_cpuid(void) { -#if defined(ARCH_X86) - int a, c; - - __asm __volatile - ("pushf\n" - "popl %0\n" - "movl %0, %1\n" - "xorl $0x200000, %0\n" - "push %0\n" - "popf\n" - "pushf\n" - "popl %0\n" - : "=a" (a), "=c" (c) - : - : "cc"); - - return a != c; +#if defined(PIPE_ARCH_X86) +#if defined(PIPE_OS_GCC) + int a, c; + + __asm __volatile + ("pushf\n" + "popl %0\n" + "movl %0, %1\n" + "xorl $0x200000, %0\n" + "push %0\n" + "popf\n" + "pushf\n" + "popl %0\n" + : "=a" (a), "=c" (c) + : + : "cc"); + + return a != c; +#else + /* FIXME */ + return 1; +#endif +#elif defined(PIPE_ARCH_X86_64) + return 1; #else - return 0; + return 0; #endif } -static int cpuid(unsigned int ax, unsigned int *p) + +/** + * @sa cpuid.h included in gcc-4.3 onwards. + * @sa http://msdn.microsoft.com/en-us/library/hskdteyh.aspx + */ +static INLINE void +cpuid(uint32_t ax, uint32_t *p) { -#if defined(ARCH_X86) - unsigned int flags; - - __asm __volatile - ("movl %%ebx, %%esi\n\t" - "cpuid\n\t" - "xchgl %%ebx, %%esi" - : "=a" (p[0]), "=S" (p[1]), - "=c" (p[2]), "=d" (p[3]) - : "0" (ax)); - - return 0; +#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86) + __asm __volatile ( + "xchgl %%ebx, %1\n\t" + "cpuid\n\t" + "xchgl %%ebx, %1" + : "=a" (p[0]), + "=S" (p[1]), + "=c" (p[2]), + "=d" (p[3]) + : "0" (ax) + ); +#elif defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86_64) + __asm __volatile ( + "cpuid\n\t" + : "=a" (p[0]), + "=b" (p[1]), + "=c" (p[2]), + "=d" (p[3]) + : "0" (ax) + ); +#elif defined(PIPE_CC_MSVC) + __cpuid(p, ax); #else - return -1; + p[0] = 0; + p[1] = 0; + p[2] = 0; + p[3] = 0; #endif } +#endif /* X86 or X86_64 */ -void cpu_detect_initialize() +void +util_cpu_detect(void) { - unsigned int regs[4]; - unsigned int regs2[4]; - - int mib[2], ncpu; - int len; - - memset(&__cpu_detect_caps, 0, sizeof (struct cpu_detect_caps)); - - /* Check for arch type */ -#if defined(ARCH_MIPS) - __cpu_detect_caps.type = CPU_DETECT_TYPE_MIPS; -#elif defined(ARCH_ALPHA) - __cpu_detect_caps.type = CPU_DETECT_TYPE_ALPHA; -#elif defined(ARCH_SPARC) - __cpu_detect_caps.type = CPU_DETECT_TYPE_SPARC; -#elif defined(ARCH_X86) - __cpu_detect_caps.type = CPU_DETECT_TYPE_X86; -#elif defined(ARCH_POWERPC) - __cpu_detect_caps.type = CPU_DETECT_TYPE_POWERPC; + static boolean util_cpu_detect_initialized = FALSE; + + if(util_cpu_detect_initialized) + return; + + memset(&util_cpu_caps, 0, sizeof util_cpu_caps); + + /* Check for arch type */ +#if defined(PIPE_ARCH_MIPS) + util_cpu_caps.arch = UTIL_CPU_ARCH_MIPS; +#elif defined(PIPE_ARCH_ALPHA) + util_cpu_caps.arch = UTIL_CPU_ARCH_ALPHA; +#elif defined(PIPE_ARCH_SPARC) + util_cpu_caps.arch = UTIL_CPU_ARCH_SPARC; +#elif defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + util_cpu_caps.arch = UTIL_CPU_ARCH_X86; + util_cpu_caps.little_endian = 1; +#elif defined(PIPE_ARCH_PPC) + util_cpu_caps.arch = UTIL_CPU_ARCH_POWERPC; + util_cpu_caps.little_endian = 0; #else - __cpu_detect_caps.type = CPU_DETECT_TYPE_OTHER; + util_cpu_caps.arch = UTIL_CPU_ARCH_UNKNOWN; #endif - /* Count the number of CPUs in system */ -#if !defined(OS_WIN32) && !defined(OS_UNKNOWN) && defined(_SC_NPROCESSORS_ONLN) - __cpu_detect_caps.nrcpu = sysconf(_SC_NPROCESSORS_ONLN); - if (__cpu_detect_caps.nrcpu == -1) - __cpu_detect_caps.nrcpu = 1; - -#elif defined(OS_NETBSD) || defined(OS_FREEBSD) || defined(OS_OPENBSD) - - mib[0] = CTL_HW; - mib[1] = HW_NCPU; - - len = sizeof (ncpu); - sysctl(mib, 2, &ncpu, &len, NULL, 0); - __cpu_detect_caps.nrcpu = ncpu; - + /* Count the number of CPUs in system */ +#if defined(PIPE_OS_WINDOWS) + { + SYSTEM_INFO system_info; + GetSystemInfo(&system_info); + util_cpu_caps.nr_cpus = system_info.dwNumberOfProcessors; + } +#elif defined(PIPE_OS_UNIX) && defined(_SC_NPROCESSORS_ONLN) + util_cpu_caps.nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + if (util_cpu_caps.nr_cpus == -1) + util_cpu_caps.nr_cpus = 1; +#elif defined(PIPE_OS_BSD) + { + int mib[2], ncpu; + int len; + + mib[0] = CTL_HW; + mib[1] = HW_NCPU; + + len = sizeof (ncpu); + sysctl(mib, 2, &ncpu, &len, NULL, 0); + util_cpu_caps.nr_cpus = ncpu; + } #else - __cpu_detect_caps.nrcpu = 1; + util_cpu_caps.nr_cpus = 1; #endif -#if defined(ARCH_X86) - /* No cpuid, old 486 or lower */ - if (has_cpuid() == 0) - return; - - __cpu_detect_caps.cacheline = 32; - - /* Get max cpuid level */ - cpuid(0x00000000, regs); - - if (regs[0] >= 0x00000001) { - unsigned int cacheline; - - cpuid (0x00000001, regs2); - - __cpu_detect_caps.x86cpuType = (regs2[0] >> 8) & 0xf; - if (__cpu_detect_caps.x86cpuType == 0xf) - __cpu_detect_caps.x86cpuType = 8 + ((regs2[0] >> 20) & 255); /* use extended family (P4, IA64) */ - - /* general feature flags */ - __cpu_detect_caps.hasTSC = (regs2[3] & (1 << 8 )) >> 8; /* 0x0000010 */ - __cpu_detect_caps.hasMMX = (regs2[3] & (1 << 23 )) >> 23; /* 0x0800000 */ - __cpu_detect_caps.hasSSE = (regs2[3] & (1 << 25 )) >> 25; /* 0x2000000 */ - __cpu_detect_caps.hasSSE2 = (regs2[3] & (1 << 26 )) >> 26; /* 0x4000000 */ - __cpu_detect_caps.hasSSE3 = (regs2[2] & (1)); /* 0x0000001 */ - __cpu_detect_caps.hasSSSE3 = (regs2[2] & (1 << 9 )) >> 9; /* 0x0000020 */ - __cpu_detect_caps.hasMMX2 = __cpu_detect_caps.hasSSE; /* SSE cpus supports mmxext too */ - - cacheline = ((regs2[1] >> 8) & 0xFF) * 8; - if (cacheline > 0) - __cpu_detect_caps.cacheline = cacheline; - } - - cpuid(0x80000000, regs); - - if (regs[0] >= 0x80000001) { - - cpuid(0x80000001, regs2); - - __cpu_detect_caps.hasMMX |= (regs2[3] & (1 << 23 )) >> 23; /* 0x0800000 */ - __cpu_detect_caps.hasMMX2 |= (regs2[3] & (1 << 22 )) >> 22; /* 0x400000 */ - __cpu_detect_caps.has3DNow = (regs2[3] & (1 << 31 )) >> 31; /* 0x80000000 */ - __cpu_detect_caps.has3DNowExt = (regs2[3] & (1 << 30 )) >> 30; - } - - if (regs[0] >= 0x80000006) { - cpuid(0x80000006, regs2); - __cpu_detect_caps.cacheline = regs2[2] & 0xFF; - } - - -#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(OS_CYGWIN) || defined(OS_OPENBSD) - if (__cpu_detect_caps.hasSSE) - check_os_katmai_support(); - - if (!__cpu_detect_caps.hasSSE) { - __cpu_detect_caps.hasSSE2 = 0; - __cpu_detect_caps.hasSSE3 = 0; - __cpu_detect_caps.hasSSSE3 = 0; - } -#else - __cpu_detect_caps.hasSSE = 0; - __cpu_detect_caps.hasSSE2 = 0; - __cpu_detect_caps.hasSSE3 = 0; - __cpu_detect_caps.hasSSSE3 = 0; +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + if (has_cpuid()) { + uint32_t regs[4]; + uint32_t regs2[4]; + + util_cpu_caps.cacheline = 32; + + /* Get max cpuid level */ + cpuid(0x00000000, regs); + + if (regs[0] >= 0x00000001) { + unsigned int cacheline; + + cpuid (0x00000001, regs2); + + util_cpu_caps.x86_cpu_type = (regs2[0] >> 8) & 0xf; + if (util_cpu_caps.x86_cpu_type == 0xf) + util_cpu_caps.x86_cpu_type = 8 + ((regs2[0] >> 20) & 255); /* use extended family (P4, IA64) */ + + /* general feature flags */ + util_cpu_caps.has_tsc = (regs2[3] & (1 << 8 )) >> 8; /* 0x0000010 */ + util_cpu_caps.has_mmx = (regs2[3] & (1 << 23 )) >> 23; /* 0x0800000 */ + util_cpu_caps.has_sse = (regs2[3] & (1 << 25 )) >> 25; /* 0x2000000 */ + util_cpu_caps.has_sse2 = (regs2[3] & (1 << 26 )) >> 26; /* 0x4000000 */ + util_cpu_caps.has_sse3 = (regs2[2] & (1)); /* 0x0000001 */ + util_cpu_caps.has_ssse3 = (regs2[2] & (1 << 9 )) >> 9; /* 0x0000020 */ + util_cpu_caps.has_sse4_1 = (regs2[2] & (1 << 19)) >> 19; + util_cpu_caps.has_mmx2 = util_cpu_caps.has_sse; /* SSE cpus supports mmxext too */ + + cacheline = ((regs2[1] >> 8) & 0xFF) * 8; + if (cacheline > 0) + util_cpu_caps.cacheline = cacheline; + } + + cpuid(0x80000000, regs); + + if (regs[0] >= 0x80000001) { + + cpuid(0x80000001, regs2); + + util_cpu_caps.has_mmx |= (regs2[3] & (1 << 23 )) >> 23; /* 0x0800000 */ + util_cpu_caps.has_mmx2 |= (regs2[3] & (1 << 22 )) >> 22; /* 0x400000 */ + util_cpu_caps.has_3dnow = (regs2[3] & (1 << 31 )) >> 31; /* 0x80000000 */ + util_cpu_caps.has_3dnow_ext = (regs2[3] & (1 << 30 )) >> 30; + } + + if (regs[0] >= 0x80000006) { + cpuid(0x80000006, regs2); + util_cpu_caps.cacheline = regs2[2] & 0xFF; + } + + if (util_cpu_caps.has_sse) + check_os_katmai_support(); + + if (!util_cpu_caps.has_sse) { + util_cpu_caps.has_sse2 = 0; + util_cpu_caps.has_sse3 = 0; + util_cpu_caps.has_ssse3 = 0; + util_cpu_caps.has_sse4_1 = 0; + } + } +#endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */ + +#if defined(PIPE_ARCH_PPC) + check_os_altivec_support(); +#endif /* PIPE_ARCH_PPC */ + +#ifdef DEBUG + debug_printf("util_cpu_caps.arch = %i\n", util_cpu_caps.arch); + debug_printf("util_cpu_caps.nr_cpus = %u\n", util_cpu_caps.nr_cpus); + + debug_printf("util_cpu_caps.x86_cpu_type = %u\n", util_cpu_caps.x86_cpu_type); + debug_printf("util_cpu_caps.cacheline = %u\n", util_cpu_caps.cacheline); + + debug_printf("util_cpu_caps.has_tsc = %u\n", util_cpu_caps.has_tsc); + debug_printf("util_cpu_caps.has_mmx = %u\n", util_cpu_caps.has_mmx); + debug_printf("util_cpu_caps.has_mmx2 = %u\n", util_cpu_caps.has_mmx2); + debug_printf("util_cpu_caps.has_sse = %u\n", util_cpu_caps.has_sse); + debug_printf("util_cpu_caps.has_sse2 = %u\n", util_cpu_caps.has_sse2); + debug_printf("util_cpu_caps.has_sse3 = %u\n", util_cpu_caps.has_sse3); + debug_printf("util_cpu_caps.has_ssse3 = %u\n", util_cpu_caps.has_ssse3); + debug_printf("util_cpu_caps.has_sse4_1 = %u\n", util_cpu_caps.has_sse4_1); + debug_printf("util_cpu_caps.has_3dnow = %u\n", util_cpu_caps.has_3dnow); + debug_printf("util_cpu_caps.has_3dnow_ext = %u\n", util_cpu_caps.has_3dnow_ext); + debug_printf("util_cpu_caps.has_altivec = %u\n", util_cpu_caps.has_altivec); #endif -#endif /* ARCH_X86 */ - -#if defined(ARCH_POWERPC) - check_os_altivec_support(); -#endif /* ARCH_POWERPC */ - - __cpu_detect_initialized = 1; -} - -struct cpu_detect_caps *cpu_detect_get_caps() -{ - return &__cpu_detect_caps; -} - -/* The getters and setters for feature flags */ -int cpu_detect_get_tsc() -{ - return __cpu_detect_caps.hasTSC; -} - -int cpu_detect_get_mmx() -{ - return __cpu_detect_caps.hasMMX; -} - -int cpu_detect_get_mmx2() -{ - return __cpu_detect_caps.hasMMX2; -} -int cpu_detect_get_sse() -{ - return __cpu_detect_caps.hasSSE; -} - -int cpu_detect_get_sse2() -{ - return __cpu_detect_caps.hasSSE2; -} - -int cpu_detect_get_sse3() -{ - return __cpu_detect_caps.hasSSE3; -} - -int cpu_detect_get_ssse3() -{ - return __cpu_detect_caps.hasSSSE3; + util_cpu_detect_initialized = TRUE; } - -int cpu_detect_get_3dnow() -{ - return __cpu_detect_caps.has3DNow; -} - -int cpu_detect_get_3dnow2() -{ - return __cpu_detect_caps.has3DNowExt; -} - -int cpu_detect_get_altivec() -{ - return __cpu_detect_caps.hasAltiVec; -} - diff --git a/src/gallium/auxiliary/util/u_cpu_detect.h b/src/gallium/auxiliary/util/u_cpu_detect.h index 1612d49286..4b3dc39c34 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.h +++ b/src/gallium/auxiliary/util/u_cpu_detect.h @@ -24,55 +24,55 @@ * ***************************************************************************/ -/* - * Based on the work of Eric Anholt <anholt@FreeBSD.org> +/** + * @file + * CPU feature detection. + * + * @author Dennis Smit + * @author Based on the work of Eric Anholt <anholt@FreeBSD.org> */ -#ifndef _CPU_DETECT_H -#define _CPU_DETECT_H +#ifndef _UTIL_CPU_DETECT_H +#define _UTIL_CPU_DETECT_H + +#include "pipe/p_compiler.h" -typedef enum { - CPU_DETECT_TYPE_MIPS, - CPU_DETECT_TYPE_ALPHA, - CPU_DETECT_TYPE_SPARC, - CPU_DETECT_TYPE_X86, - CPU_DETECT_TYPE_POWERPC, - CPU_DETECT_TYPE_OTHER -} cpu_detect_type; +enum util_cpu_arch { + UTIL_CPU_ARCH_UNKNOWN = 0, + UTIL_CPU_ARCH_MIPS, + UTIL_CPU_ARCH_ALPHA, + UTIL_CPU_ARCH_SPARC, + UTIL_CPU_ARCH_X86, + UTIL_CPU_ARCH_POWERPC +}; -struct cpu_detect_caps { - cpu_detect_type type; - int nrcpu; +struct util_cpu_caps { + enum util_cpu_arch arch; + unsigned nr_cpus; - /* Feature flags */ - int x86cpuType; - int cacheline; + /* Feature flags */ + int x86_cpu_type; + unsigned cacheline; - int hasTSC; - int hasMMX; - int hasMMX2; - int hasSSE; - int hasSSE2; - int hasSSE3; - int hasSSSE3; - int has3DNow; - int has3DNowExt; - int hasAltiVec; + unsigned little_endian:1; + + unsigned has_tsc:1; + unsigned has_mmx:1; + unsigned has_mmx2:1; + unsigned has_sse:1; + unsigned has_sse2:1; + unsigned has_sse3:1; + unsigned has_ssse3:1; + unsigned has_sse4_1:1; + unsigned has_3dnow:1; + unsigned has_3dnow_ext:1; + unsigned has_altivec:1; }; -/* prototypes */ -void cpu_detect_initialize(void); -struct cpu_detect_caps *cpu_detect_get_caps(void); +extern struct util_cpu_caps +util_cpu_caps; + +void util_cpu_detect(void); -int cpu_detect_get_tsc(void); -int cpu_detect_get_mmx(void); -int cpu_detect_get_mmx2(void); -int cpu_detect_get_sse(void); -int cpu_detect_get_sse2(void); -int cpu_detect_get_sse3(void); -int cpu_detect_get_ssse3(void); -int cpu_detect_get_3dnow(void); -int cpu_detect_get_3dnow2(void); -int cpu_detect_get_altivec(void); -#endif /* _CPU_DETECT_H */ +#endif /* _UTIL_CPU_DETECT_H */ diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index 96d400c839..40633574b0 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -669,10 +669,10 @@ void debug_dump_surface(const char *prefix, goto error; debug_dump_image(prefix, - transfer->format, - transfer->block.size, - transfer->nblocksx, - transfer->nblocksy, + texture->format, + pf_get_blocksize(texture->format), + pf_get_nblocksx(texture->format, transfer->width), + pf_get_nblocksy(texture->format, transfer->height), transfer->stride, data); diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h index 1380d98d7e..abd834c741 100644 --- a/src/gallium/auxiliary/util/u_debug.h +++ b/src/gallium/auxiliary/util/u_debug.h @@ -65,6 +65,11 @@ extern "C" { #define __FUNCTION__ "???" #endif +#if defined(__GNUC__) +#define _util_printf_format(fmt, list) __attribute__ ((format (printf, fmt, list))) +#else +#define _util_printf_format(fmt, list) +#endif void _debug_vprintf(const char *format, va_list ap); @@ -82,14 +87,17 @@ _debug_printf(const char *format, ...) /** * Print debug messages. * - * The actual channel used to output debug message is platform specific. To - * avoid misformating or truncation, follow these rules of thumb: + * The actual channel used to output debug message is platform specific. To + * avoid misformating or truncation, follow these rules of thumb: * - output whole lines - * - avoid outputing large strings (512 bytes is the current maximum length + * - avoid outputing large strings (512 bytes is the current maximum length * that is guaranteed to be printed in all platforms) */ #if !defined(PIPE_OS_HAIKU) static INLINE void +debug_printf(const char *format, ...) _util_printf_format(1,2); + +static INLINE void debug_printf(const char *format, ...) { #ifdef DEBUG @@ -173,11 +181,14 @@ void _debug_assert_fail(const char *expr, * * Do not expect that the assert call terminates -- errors must be handled * regardless of assert behavior. + * + * For non debug builds the assert macro will expand to a no-op, so do not + * call functions with side effects in the assert expression. */ #ifdef DEBUG #define debug_assert(expr) ((expr) ? (void)0 : _debug_assert_fail(#expr, __FILE__, __LINE__, __FUNCTION__)) #else -#define debug_assert(expr) ((void)(expr)) +#define debug_assert(expr) ((void)0) #endif @@ -340,17 +351,6 @@ void debug_memory_end(unsigned long beginning); -#if defined(PROFILE) && defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) - -void -debug_profile_start(void); - -void -debug_profile_stop(void); - -#endif - - #ifdef DEBUG struct pipe_surface; struct pipe_transfer; diff --git a/src/gallium/auxiliary/util/u_debug_dump.c b/src/gallium/auxiliary/util/u_debug_dump.c index 6bdecde048..09866880ae 100644 --- a/src/gallium/auxiliary/util/u_debug_dump.c +++ b/src/gallium/auxiliary/util/u_debug_dump.c @@ -187,3 +187,83 @@ debug_dump_func_short_names[] = { }; DEFINE_DEBUG_DUMP_CONTINUOUS(func) + + +static const char * +debug_dump_tex_target_names[] = { + "PIPE_TEXTURE_1D", + "PIPE_TEXTURE_2D", + "PIPE_TEXTURE_3D", + "PIPE_TEXTURE_CUBE" +}; + +static const char * +debug_dump_tex_target_short_names[] = { + "1d", + "2d", + "3d", + "cube" +}; + +DEFINE_DEBUG_DUMP_CONTINUOUS(tex_target) + + +static const char * +debug_dump_tex_wrap_names[] = { + "PIPE_TEX_WRAP_REPEAT", + "PIPE_TEX_WRAP_CLAMP", + "PIPE_TEX_WRAP_CLAMP_TO_EDGE", + "PIPE_TEX_WRAP_CLAMP_TO_BORDER", + "PIPE_TEX_WRAP_MIRROR_REPEAT", + "PIPE_TEX_WRAP_MIRROR_CLAMP", + "PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE", + "PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER" +}; + +static const char * +debug_dump_tex_wrap_short_names[] = { + "repeat", + "clamp", + "clamp_to_edge", + "clamp_to_border", + "mirror_repeat", + "mirror_clamp", + "mirror_clamp_to_edge", + "mirror_clamp_to_border" +}; + +DEFINE_DEBUG_DUMP_CONTINUOUS(tex_wrap) + + +static const char * +debug_dump_tex_mipfilter_names[] = { + "PIPE_TEX_MIPFILTER_NEAREST", + "PIPE_TEX_MIPFILTER_LINEAR", + "PIPE_TEX_MIPFILTER_NONE" +}; + +static const char * +debug_dump_tex_mipfilter_short_names[] = { + "nearest", + "linear", + "none" +}; + +DEFINE_DEBUG_DUMP_CONTINUOUS(tex_mipfilter) + + +static const char * +debug_dump_tex_filter_names[] = { + "PIPE_TEX_FILTER_NEAREST", + "PIPE_TEX_FILTER_LINEAR", + "PIPE_TEX_FILTER_ANISO" +}; + +static const char * +debug_dump_tex_filter_short_names[] = { + "nearest", + "linear", + "aniso" +}; + +DEFINE_DEBUG_DUMP_CONTINUOUS(tex_filter) diff --git a/src/gallium/auxiliary/util/u_debug_dump.h b/src/gallium/auxiliary/util/u_debug_dump.h index 102935559c..19b130ad18 100644 --- a/src/gallium/auxiliary/util/u_debug_dump.h +++ b/src/gallium/auxiliary/util/u_debug_dump.h @@ -54,6 +54,18 @@ debug_dump_blend_func(unsigned value, boolean shortened); const char * debug_dump_func(unsigned value, boolean shortened); +const char * +debug_dump_tex_target(unsigned value, boolean shortened); + +const char * +debug_dump_tex_wrap(unsigned value, boolean shortened); + +const char * +debug_dump_tex_mipfilter(unsigned value, boolean shortened); + +const char * +debug_dump_tex_filter(unsigned value, boolean shortened); + /* FIXME: Move the other debug_dump_xxx functions out of u_debug.h into here. */ diff --git a/src/gallium/auxiliary/util/u_debug_profile.c b/src/gallium/auxiliary/util/u_debug_profile.c deleted file mode 100644 index 6d8b244c3a..0000000000 --- a/src/gallium/auxiliary/util/u_debug_profile.c +++ /dev/null @@ -1,320 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Poor-man profiling. - * - * @author José Fonseca <jrfonseca@tungstengraphics.com> - * - * @sa http://blogs.msdn.com/joshpoley/archive/2008/03/12/poor-man-s-profiler.aspx - * @sa http://www.johnpanzer.com/aci_cuj/index.html - */ - -#include "pipe/p_config.h" - -#if defined(PROFILE) && defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) - -#include <windows.h> -#include <winddi.h> - -#include "util/u_debug.h" -#include "util/u_string.h" - - -#define PROFILE_TABLE_SIZE (1024*1024) -#define FILE_NAME_SIZE 256 - -struct debug_profile_entry -{ - uintptr_t caller; - uintptr_t callee; - uint64_t samples; -}; - -static unsigned long enabled = 0; - -static WCHAR wFileName[FILE_NAME_SIZE] = L"\\??\\c:\\00000000.prof"; -static ULONG_PTR iFile = 0; - -static struct debug_profile_entry *table = NULL; -static unsigned long free_table_entries = 0; -static unsigned long max_table_entries = 0; - -uint64_t start_stamp = 0; -uint64_t end_stamp = 0; - - -static void -debug_profile_entry(uintptr_t caller, uintptr_t callee, uint64_t samples) -{ - unsigned hash = ( caller + callee ) & PROFILE_TABLE_SIZE - 1; - - while(1) { - if(table[hash].caller == 0 && table[hash].callee == 0) { - table[hash].caller = caller; - table[hash].callee = callee; - table[hash].samples = samples; - --free_table_entries; - break; - } - else if(table[hash].caller == caller && table[hash].callee == callee) { - table[hash].samples += samples; - break; - } - else { - ++hash; - } - } -} - - -static uintptr_t caller_stack[1024]; -static unsigned last_caller = 0; - - -static int64_t delta(void) { - int64_t result = end_stamp - start_stamp; - if(result > UINT64_C(0xffffffff)) - result = 0; - return result; -} - - -static void __cdecl -debug_profile_enter(uintptr_t callee) -{ - uintptr_t caller = last_caller ? caller_stack[last_caller - 1] : 0; - - if (caller) - debug_profile_entry(caller, 0, delta()); - debug_profile_entry(caller, callee, 1); - caller_stack[last_caller++] = callee; -} - - -static void __cdecl -debug_profile_exit(uintptr_t callee) -{ - debug_profile_entry(callee, 0, delta()); - if(last_caller) - --last_caller; -} - - -/** - * Called at the start of every method or function. - * - * @sa http://msdn.microsoft.com/en-us/library/c63a9b7h.aspx - */ -void __declspec(naked) __cdecl -_penter(void) { - _asm { - push eax - mov eax, [enabled] - test eax, eax - jz skip - - push edx - - rdtsc - mov dword ptr [end_stamp], eax - mov dword ptr [end_stamp+4], edx - - xor eax, eax - mov [enabled], eax - - mov eax, [esp+8] - - push ebx - push ecx - push ebp - push edi - push esi - - push eax - call debug_profile_enter - add esp, 4 - - pop esi - pop edi - pop ebp - pop ecx - pop ebx - - mov eax, 1 - mov [enabled], eax - - rdtsc - mov dword ptr [start_stamp], eax - mov dword ptr [start_stamp+4], edx - - pop edx -skip: - pop eax - ret - } -} - - -/** - * Called at the end of Calls the end of every method or function. - * - * @sa http://msdn.microsoft.com/en-us/library/xc11y76y.aspx - */ -void __declspec(naked) __cdecl -_pexit(void) { - _asm { - push eax - mov eax, [enabled] - test eax, eax - jz skip - - push edx - - rdtsc - mov dword ptr [end_stamp], eax - mov dword ptr [end_stamp+4], edx - - xor eax, eax - mov [enabled], eax - - mov eax, [esp+8] - - push ebx - push ecx - push ebp - push edi - push esi - - push eax - call debug_profile_exit - add esp, 4 - - pop esi - pop edi - pop ebp - pop ecx - pop ebx - - mov eax, 1 - mov [enabled], eax - - rdtsc - mov dword ptr [start_stamp], eax - mov dword ptr [start_stamp+4], edx - - pop edx -skip: - pop eax - ret - } -} - - -/** - * Reference function for calibration. - */ -void __declspec(naked) -__debug_profile_reference(void) { - _asm { - call _penter - call _pexit - ret - } -} - - -void -debug_profile_start(void) -{ - WCHAR *p; - - // increment starting from the less significant digit - p = &wFileName[14]; - while(1) { - if(*p == '9') { - *p-- = '0'; - } - else { - *p += 1; - break; - } - } - - table = EngMapFile(wFileName, - PROFILE_TABLE_SIZE*sizeof(struct debug_profile_entry), - &iFile); - if(table) { - unsigned i; - - free_table_entries = max_table_entries = PROFILE_TABLE_SIZE; - memset(table, 0, PROFILE_TABLE_SIZE*sizeof(struct debug_profile_entry)); - - table[0].caller = (uintptr_t)&__debug_profile_reference; - table[0].callee = 0; - table[0].samples = 0; - --free_table_entries; - - _asm { - push edx - push eax - - rdtsc - mov dword ptr [start_stamp], eax - mov dword ptr [start_stamp+4], edx - - pop edx - pop eax - } - - last_caller = 0; - - enabled = 1; - - for(i = 0; i < 8; ++i) { - _asm { - call __debug_profile_reference - } - } - } -} - - -void -debug_profile_stop(void) -{ - enabled = 0; - - if(iFile) - EngUnmapFile(iFile); - iFile = 0; - table = NULL; - free_table_entries = max_table_entries = 0; -} - -#endif /* PROFILE */ diff --git a/src/gallium/auxiliary/util/u_debug_symbol.c b/src/gallium/auxiliary/util/u_debug_symbol.c index 811931f81b..417d0cf04c 100644 --- a/src/gallium/auxiliary/util/u_debug_symbol.c +++ b/src/gallium/auxiliary/util/u_debug_symbol.c @@ -214,7 +214,7 @@ debug_symbol_print_imagehlp(const void *addr) HANDLE hProcess; BYTE symbolBuffer[1024]; PIMAGEHLP_SYMBOL pSymbol = (PIMAGEHLP_SYMBOL) symbolBuffer; - DWORD dwDisplacement = 0; // Displacement of the input address, relative to the start of the symbol + DWORD dwDisplacement = 0; /* Displacement of the input address, relative to the start of the symbol */ hProcess = GetCurrentProcess(); diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index f1bf94f17d..b9cc2aa716 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -97,3 +97,13 @@ PIPE_FORMAT_B8G8R8A8_SRGB , arith , 1, 1, u8 , u8 , u8 , u8 , zyxw, PIPE_FORMAT_B8G8R8X8_SRGB , arith , 1, 1, u8 , u8 , u8 , u8 , zyx1, srgb PIPE_FORMAT_X8UB8UG8SR8S_NORM , arith , 1, 1, sn8 , sn8 , un8 , x8 , 1zyx, rgb PIPE_FORMAT_B6UG5SR5S_NORM , arith , 1, 1, sn5 , sn5 , un6 , , xyz1, rgb +PIPE_FORMAT_YCBCR , yuv , 2, 1, x32 , , , , xyz1, yuv +PIPE_FORMAT_YCBCR_REV , yuv , 2, 1, x32 , , , , xyz1, yuv +PIPE_FORMAT_DXT1_RGBA , dxt , 4, 4, x64 , , , , xyzw, rgb +PIPE_FORMAT_DXT1_RGB , dxt , 4, 4, x64 , , , , xyz1, rgb +PIPE_FORMAT_DXT3_RGBA , dxt , 4, 4, x128, , , , xyzw, rgb +PIPE_FORMAT_DXT5_RGBA , dxt , 4, 4, x128, , , , xyzw, rgb +PIPE_FORMAT_DXT1_SRGBA , dxt , 4, 4, x64 , , , , xyzw, srgb +PIPE_FORMAT_DXT1_SRGB , dxt , 4, 4, x64 , , , , xyz1, srgb +PIPE_FORMAT_DXT3_SRGBA , dxt , 4, 4, x128, , , , xyzw, srgb +PIPE_FORMAT_DXT5_SRGBA , dxt , 4, 4, x128, , , , xyzw, srgb diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index 7b5b7fcda5..19b902db98 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -33,10 +33,46 @@ #include "pipe/p_format.h" +/** + * Describe how to best pack/unpack pixels into/from the prescribed format. + * + * These are used for automatic code generation of pixel packing and unpacking + * routines (in compile time, e.g., u_format_access.py, or in runtime, like + * llvmpipe does). + * + * Thumb rule is: if you're not code generating pixel packing/unpacking then + * these are irrelevant for you. + * + * Note that this can be deduced from other values in util_format_description + * structure. This is by design, to make code generation of pixel + * packing/unpacking/sampling routines simple and efficient. + * + * XXX: This should be renamed to something like util_format_pack. + */ enum util_format_layout { + /** + * Single scalar component. + */ UTIL_FORMAT_LAYOUT_SCALAR = 0, + + /** + * One or more components of mixed integer formats, arithmetically encoded + * in a word up to 32bits. + */ UTIL_FORMAT_LAYOUT_ARITH = 1, + + /** + * One or more components, no mixed formats, each with equal power of two + * number of bytes. + */ UTIL_FORMAT_LAYOUT_ARRAY = 2, + + /** + * XXX: Not used yet. These might go away and be replaced by a single entry, + * for formats where multiple pixels have to be + * read in order to determine a single pixel value (i.e., block.width > 1 + * || block.height > 1) + */ UTIL_FORMAT_LAYOUT_YUV = 3, UTIL_FORMAT_LAYOUT_DXT = 4 }; @@ -50,7 +86,7 @@ struct util_format_block /** Block height in pixels */ unsigned height; - /** Block size in bytes */ + /** Block size in bits */ unsigned bits; }; diff --git a/src/gallium/auxiliary/util/u_format_table.py b/src/gallium/auxiliary/util/u_format_table.py index 8834568e8e..2cd0f95678 100755 --- a/src/gallium/auxiliary/util/u_format_table.py +++ b/src/gallium/auxiliary/util/u_format_table.py @@ -44,11 +44,10 @@ def colorspace_map(colorspace): colorspace_channels_map = { - 'rgb': 'rgba', - 'rgba': 'rgba', - 'zs': 'zs', - 'yuv': ['y1', 'y2', 'u', 'v'], - 'dxt': [] + 'rgb': ['r', 'g', 'b', 'a'], + 'srgb': ['sr', 'sg', 'sb', 'a'], + 'zs': ['z', 's'], + 'yuv': ['y', 'u', 'v'], } @@ -94,7 +93,7 @@ def write_format_table(formats): print " {" print " %s," % (format.name,) print " \"%s\"," % (format.name,) - print " {%u, %u, %u}, /* block */" % (format.block_width, format.block_height, format.block_size()) + print " {%u, %u, %u},\t/* block */" % (format.block_width, format.block_height, format.block_size()) print " %s," % (layout_map(format.layout),) print " {" for i in range(4): @@ -103,7 +102,7 @@ def write_format_table(formats): sep = "," else: sep = "" - print " {%s, %s, %u}%s /* %s */" % (kind_map[type.kind], bool_map(type.norm), type.size, sep, "xyzw"[i]) + print " {%s, %s, %u}%s\t/* %s */" % (kind_map[type.kind], bool_map(type.norm), type.size, sep, "xyzw"[i]) print " }," print " {" for i in range(4): @@ -113,10 +112,10 @@ def write_format_table(formats): else: sep = "" try: - comment = layout_channels_map[format.layout][i] - except: + comment = colorspace_channels_map[format.colorspace][i] + except (KeyError, IndexError): comment = 'ignored' - print " %s%s /* %s */" % (swizzle_map[swizzle], sep, comment) + print " %s%s\t/* %s */" % (swizzle_map[swizzle], sep, comment) print " }," print " %s," % (colorspace_map(format.colorspace),) print " }," diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index f06c0e463d..83263d9fe6 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -45,6 +45,7 @@ #include "util/u_draw_quad.h" #include "util/u_gen_mipmap.h" #include "util/u_simple_shaders.h" +#include "util/u_math.h" #include "cso_cache/cso_context.h" @@ -995,7 +996,7 @@ reduce_2d(enum pipe_format pformat, { enum dtype datatype; uint comps; - const int bpt = pf_get_size(pformat); + const int bpt = pf_get_blocksize(pformat); const ubyte *srcA, *srcB; ubyte *dst; int row; @@ -1034,7 +1035,7 @@ reduce_3d(enum pipe_format pformat, int dstWidth, int dstHeight, int dstDepth, int dstRowStride, ubyte *dstPtr) { - const int bpt = pf_get_size(pformat); + const int bpt = pf_get_blocksize(pformat); const int border = 0; int img, row; int bytesPerSrcImage, bytesPerDstImage; @@ -1125,12 +1126,12 @@ make_1d_mipmap(struct gen_mipmap_state *ctx, srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice, PIPE_TRANSFER_READ, 0, 0, - pt->width[srcLevel], - pt->height[srcLevel]); + u_minify(pt->width0, srcLevel), + u_minify(pt->height0, srcLevel)); dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice, PIPE_TRANSFER_WRITE, 0, 0, - pt->width[dstLevel], - pt->height[dstLevel]); + u_minify(pt->width0, dstLevel), + u_minify(pt->height0, dstLevel)); srcMap = (ubyte *) screen->transfer_map(screen, srcTrans); dstMap = (ubyte *) screen->transfer_map(screen, dstTrans); @@ -1158,8 +1159,8 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, const uint zslice = 0; uint dstLevel; - assert(pt->block.width == 1); - assert(pt->block.height == 1); + assert(pf_get_blockwidth(pt->format) == 1); + assert(pf_get_blockheight(pt->format) == 1); for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { const uint srcLevel = dstLevel - 1; @@ -1168,12 +1169,12 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice, PIPE_TRANSFER_READ, 0, 0, - pt->width[srcLevel], - pt->height[srcLevel]); + u_minify(pt->width0, srcLevel), + u_minify(pt->height0, srcLevel)); dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice, PIPE_TRANSFER_WRITE, 0, 0, - pt->width[dstLevel], - pt->height[dstLevel]); + u_minify(pt->width0, dstLevel), + u_minify(pt->height0, dstLevel)); srcMap = (ubyte *) screen->transfer_map(screen, srcTrans); dstMap = (ubyte *) screen->transfer_map(screen, dstTrans); @@ -1203,8 +1204,8 @@ make_3d_mipmap(struct gen_mipmap_state *ctx, struct pipe_screen *screen = pipe->screen; uint dstLevel, zslice = 0; - assert(pt->block.width == 1); - assert(pt->block.height == 1); + assert(pf_get_blockwidth(pt->format) == 1); + assert(pf_get_blockheight(pt->format) == 1); for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { const uint srcLevel = dstLevel - 1; @@ -1213,12 +1214,12 @@ make_3d_mipmap(struct gen_mipmap_state *ctx, srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice, PIPE_TRANSFER_READ, 0, 0, - pt->width[srcLevel], - pt->height[srcLevel]); + u_minify(pt->width0, srcLevel), + u_minify(pt->height0, srcLevel)); dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice, PIPE_TRANSFER_WRITE, 0, 0, - pt->width[dstLevel], - pt->height[dstLevel]); + u_minify(pt->width0, dstLevel), + u_minify(pt->height0, dstLevel)); srcMap = (ubyte *) screen->transfer_map(screen, srcTrans); dstMap = (ubyte *) screen->transfer_map(screen, dstTrans); @@ -1427,6 +1428,7 @@ set_vertex_data(struct gen_mipmap_state *ctx, rz = -1.0f; break; default: + rx = ry = rz = 0.0f; assert(0); } @@ -1515,6 +1517,17 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, uint zslice = 0; uint offset; + /* The texture object should have room for the levels which we're + * about to generate. + */ + assert(lastLevel <= pt->last_level); + + /* If this fails, why are we here? */ + assert(lastLevel > baseLevel); + + assert(filter == PIPE_TEX_FILTER_LINEAR || + filter == PIPE_TEX_FILTER_NEAREST); + /* check if we can render in the texture's format */ if (!screen->is_format_supported(screen, pt->format, PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) { @@ -1563,8 +1576,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, * Setup framebuffer / dest surface */ fb.cbufs[0] = surf; - fb.width = pt->width[dstLevel]; - fb.height = pt->height[dstLevel]; + fb.width = u_minify(pt->width0, dstLevel); + fb.height = u_minify(pt->height0, dstLevel); cso_set_framebuffer(ctx->cso, &fb); /* @@ -1585,8 +1598,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, offset = set_vertex_data(ctx, pt->target, face, - (float) pt->width[dstLevel], - (float) pt->height[dstLevel]); + (float) u_minify(pt->width0, dstLevel), + (float) u_minify(pt->height0, dstLevel)); util_draw_vertex_buffer(ctx->pipe, ctx->vbuf, diff --git a/src/gallium/auxiliary/util/u_hash_table.c b/src/gallium/auxiliary/util/u_hash_table.c index 8c2a8f454c..5604e3ac37 100644 --- a/src/gallium/auxiliary/util/u_hash_table.c +++ b/src/gallium/auxiliary/util/u_hash_table.c @@ -47,7 +47,7 @@ #include "util/u_hash_table.h" -struct hash_table +struct util_hash_table { struct cso_hash *cso; @@ -61,27 +61,27 @@ struct hash_table }; -struct hash_table_item +struct util_hash_table_item { void *key; void *value; }; -static INLINE struct hash_table_item * -hash_table_item(struct cso_hash_iter iter) +static INLINE struct util_hash_table_item * +util_hash_table_item(struct cso_hash_iter iter) { - return (struct hash_table_item *)cso_hash_iter_data(iter); + return (struct util_hash_table_item *)cso_hash_iter_data(iter); } -struct hash_table * -hash_table_create(unsigned (*hash)(void *key), - int (*compare)(void *key1, void *key2)) +struct util_hash_table * +util_hash_table_create(unsigned (*hash)(void *key), + int (*compare)(void *key1, void *key2)) { - struct hash_table *ht; + struct util_hash_table *ht; - ht = MALLOC_STRUCT(hash_table); + ht = MALLOC_STRUCT(util_hash_table); if(!ht) return NULL; @@ -99,16 +99,16 @@ hash_table_create(unsigned (*hash)(void *key), static INLINE struct cso_hash_iter -hash_table_find_iter(struct hash_table *ht, - void *key, - unsigned key_hash) +util_hash_table_find_iter(struct util_hash_table *ht, + void *key, + unsigned key_hash) { struct cso_hash_iter iter; - struct hash_table_item *item; + struct util_hash_table_item *item; iter = cso_hash_find(ht->cso, key_hash); while (!cso_hash_iter_is_null(iter)) { - item = (struct hash_table_item *)cso_hash_iter_data(iter); + item = (struct util_hash_table_item *)cso_hash_iter_data(iter); if (!ht->compare(item->key, key)) break; iter = cso_hash_iter_next(iter); @@ -118,17 +118,17 @@ hash_table_find_iter(struct hash_table *ht, } -static INLINE struct hash_table_item * -hash_table_find_item(struct hash_table *ht, - void *key, - unsigned key_hash) +static INLINE struct util_hash_table_item * +util_hash_table_find_item(struct util_hash_table *ht, + void *key, + unsigned key_hash) { struct cso_hash_iter iter; - struct hash_table_item *item; + struct util_hash_table_item *item; iter = cso_hash_find(ht->cso, key_hash); while (!cso_hash_iter_is_null(iter)) { - item = (struct hash_table_item *)cso_hash_iter_data(iter); + item = (struct util_hash_table_item *)cso_hash_iter_data(iter); if (!ht->compare(item->key, key)) return item; iter = cso_hash_iter_next(iter); @@ -139,12 +139,12 @@ hash_table_find_item(struct hash_table *ht, enum pipe_error -hash_table_set(struct hash_table *ht, - void *key, - void *value) +util_hash_table_set(struct util_hash_table *ht, + void *key, + void *value) { unsigned key_hash; - struct hash_table_item *item; + struct util_hash_table_item *item; struct cso_hash_iter iter; assert(ht); @@ -153,14 +153,14 @@ hash_table_set(struct hash_table *ht, key_hash = ht->hash(key); - item = hash_table_find_item(ht, key, key_hash); + item = util_hash_table_find_item(ht, key, key_hash); if(item) { /* TODO: key/value destruction? */ item->value = value; return PIPE_OK; } - item = MALLOC_STRUCT(hash_table_item); + item = MALLOC_STRUCT(util_hash_table_item); if(!item) return PIPE_ERROR_OUT_OF_MEMORY; @@ -178,11 +178,11 @@ hash_table_set(struct hash_table *ht, void * -hash_table_get(struct hash_table *ht, - void *key) +util_hash_table_get(struct util_hash_table *ht, + void *key) { unsigned key_hash; - struct hash_table_item *item; + struct util_hash_table_item *item; assert(ht); if (!ht) @@ -190,7 +190,7 @@ hash_table_get(struct hash_table *ht, key_hash = ht->hash(key); - item = hash_table_find_item(ht, key, key_hash); + item = util_hash_table_find_item(ht, key, key_hash); if(!item) return NULL; @@ -199,12 +199,12 @@ hash_table_get(struct hash_table *ht, void -hash_table_remove(struct hash_table *ht, - void *key) +util_hash_table_remove(struct util_hash_table *ht, + void *key) { unsigned key_hash; struct cso_hash_iter iter; - struct hash_table_item *item; + struct util_hash_table_item *item; assert(ht); if (!ht) @@ -212,11 +212,11 @@ hash_table_remove(struct hash_table *ht, key_hash = ht->hash(key); - iter = hash_table_find_iter(ht, key, key_hash); + iter = util_hash_table_find_iter(ht, key, key_hash); if(cso_hash_iter_is_null(iter)) return; - item = hash_table_item(iter); + item = util_hash_table_item(iter); assert(item); FREE(item); @@ -225,10 +225,10 @@ hash_table_remove(struct hash_table *ht, void -hash_table_clear(struct hash_table *ht) +util_hash_table_clear(struct util_hash_table *ht) { struct cso_hash_iter iter; - struct hash_table_item *item; + struct util_hash_table_item *item; assert(ht); if (!ht) @@ -236,7 +236,7 @@ hash_table_clear(struct hash_table *ht) iter = cso_hash_first_node(ht->cso); while (!cso_hash_iter_is_null(iter)) { - item = (struct hash_table_item *)cso_hash_take(ht->cso, cso_hash_iter_key(iter)); + item = (struct util_hash_table_item *)cso_hash_take(ht->cso, cso_hash_iter_key(iter)); FREE(item); iter = cso_hash_first_node(ht->cso); } @@ -244,12 +244,13 @@ hash_table_clear(struct hash_table *ht) enum pipe_error -hash_table_foreach(struct hash_table *ht, - enum pipe_error (*callback)(void *key, void *value, void *data), - void *data) +util_hash_table_foreach(struct util_hash_table *ht, + enum pipe_error (*callback) + (void *key, void *value, void *data), + void *data) { struct cso_hash_iter iter; - struct hash_table_item *item; + struct util_hash_table_item *item; enum pipe_error result; assert(ht); @@ -258,7 +259,7 @@ hash_table_foreach(struct hash_table *ht, iter = cso_hash_first_node(ht->cso); while (!cso_hash_iter_is_null(iter)) { - item = (struct hash_table_item *)cso_hash_iter_data(iter); + item = (struct util_hash_table_item *)cso_hash_iter_data(iter); result = callback(item->key, item->value, data); if(result != PIPE_OK) return result; @@ -270,10 +271,10 @@ hash_table_foreach(struct hash_table *ht, void -hash_table_destroy(struct hash_table *ht) +util_hash_table_destroy(struct util_hash_table *ht) { struct cso_hash_iter iter; - struct hash_table_item *item; + struct util_hash_table_item *item; assert(ht); if (!ht) @@ -281,7 +282,7 @@ hash_table_destroy(struct hash_table *ht) iter = cso_hash_first_node(ht->cso); while (!cso_hash_iter_is_null(iter)) { - item = (struct hash_table_item *)cso_hash_iter_data(iter); + item = (struct util_hash_table_item *)cso_hash_iter_data(iter); FREE(item); iter = cso_hash_iter_next(iter); } diff --git a/src/gallium/auxiliary/util/u_hash_table.h b/src/gallium/auxiliary/util/u_hash_table.h index feee881582..51ec10a804 100644 --- a/src/gallium/auxiliary/util/u_hash_table.h +++ b/src/gallium/auxiliary/util/u_hash_table.h @@ -35,7 +35,7 @@ #define U_HASH_TABLE_H_ -#include "pipe/p_error.h" +#include "pipe/p_defines.h" #ifdef __cplusplus @@ -46,7 +46,7 @@ extern "C" { /** * Generic purpose hash table. */ -struct hash_table; +struct util_hash_table; /** @@ -55,37 +55,38 @@ struct hash_table; * @param hash hash function * @param compare should return 0 for two equal keys. */ -struct hash_table * -hash_table_create(unsigned (*hash)(void *key), - int (*compare)(void *key1, void *key2)); +struct util_hash_table * +util_hash_table_create(unsigned (*hash)(void *key), + int (*compare)(void *key1, void *key2)); enum pipe_error -hash_table_set(struct hash_table *ht, - void *key, - void *value); +util_hash_table_set(struct util_hash_table *ht, + void *key, + void *value); void * -hash_table_get(struct hash_table *ht, - void *key); +util_hash_table_get(struct util_hash_table *ht, + void *key); void -hash_table_remove(struct hash_table *ht, - void *key); +util_hash_table_remove(struct util_hash_table *ht, + void *key); void -hash_table_clear(struct hash_table *ht); +util_hash_table_clear(struct util_hash_table *ht); enum pipe_error -hash_table_foreach(struct hash_table *ht, - enum pipe_error (*callback)(void *key, void *value, void *data), - void *data); +util_hash_table_foreach(struct util_hash_table *ht, + enum pipe_error (*callback) + (void *key, void *value, void *data), + void *data); void -hash_table_destroy(struct hash_table *ht); +util_hash_table_destroy(struct util_hash_table *ht); #ifdef __cplusplus diff --git a/src/gallium/auxiliary/util/u_keymap.c b/src/gallium/auxiliary/util/u_keymap.c index 508a2ee063..c4b9eb3d9b 100644 --- a/src/gallium/auxiliary/util/u_keymap.c +++ b/src/gallium/auxiliary/util/u_keymap.c @@ -28,7 +28,7 @@ /** * Key lookup/associative container. * - * Like Jose's u_hash_table, based on CSO cache code for now. + * Like Jose's util_hash_table, based on CSO cache code for now. * * Author: Brian Paul */ @@ -36,7 +36,7 @@ #include "pipe/p_compiler.h" #include "util/u_debug.h" -#include "pipe/p_error.h" +#include "pipe/p_defines.h" #include "cso_cache/cso_hash.h" diff --git a/src/gallium/auxiliary/util/u_linear.c b/src/gallium/auxiliary/util/u_linear.c index a1dce3f5cf..f1aef21677 100644 --- a/src/gallium/auxiliary/util/u_linear.c +++ b/src/gallium/auxiliary/util/u_linear.c @@ -82,7 +82,7 @@ void pipe_linear_from_tile(struct pipe_tile_info *t, const void *src_ptr, void pipe_linear_fill_info(struct pipe_tile_info *t, - const struct pipe_format_block *block, + const struct u_linear_format_block *block, unsigned tile_width, unsigned tile_height, unsigned tiles_x, unsigned tiles_y) { diff --git a/src/gallium/auxiliary/util/u_linear.h b/src/gallium/auxiliary/util/u_linear.h index b74308ffa3..42c40b2aa7 100644 --- a/src/gallium/auxiliary/util/u_linear.h +++ b/src/gallium/auxiliary/util/u_linear.h @@ -35,6 +35,19 @@ #include "pipe/p_format.h" +struct u_linear_format_block +{ + /** Block size in bytes */ + unsigned size; + + /** Block width in pixels */ + unsigned width; + + /** Block height in pixels */ + unsigned height; +}; + + struct pipe_tile_info { unsigned size; @@ -49,10 +62,10 @@ struct pipe_tile_info unsigned rows; /* Describe the tile in pixels */ - struct pipe_format_block tile; + struct u_linear_format_block tile; /* Describe each block within the tile */ - struct pipe_format_block block; + struct u_linear_format_block block; }; void pipe_linear_to_tile(size_t src_stride, const void *src_ptr, @@ -71,7 +84,7 @@ void pipe_linear_from_tile(struct pipe_tile_info *t, const void *src_ptr, * @tiles_y number of tiles in y axis */ void pipe_linear_fill_info(struct pipe_tile_info *t, - const struct pipe_format_block *block, + const struct u_linear_format_block *block, unsigned tile_width, unsigned tile_height, unsigned tiles_x, unsigned tiles_y); diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index b428dc544c..b76592d1ec 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -283,6 +283,14 @@ util_fast_pow(float x, float y) return util_fast_exp2(util_fast_log2(x) * y); } +/* Note that this counts zero as a power of two. + */ +static INLINE boolean +util_is_power_of_two( unsigned v ) +{ + return (v & (v-1)) == 0; +} + /** * Floor(x), returned as int. @@ -463,6 +471,67 @@ util_logbase2(unsigned n) /** + * Returns the smallest power of two >= x + */ +static INLINE unsigned +util_next_power_of_two(unsigned x) +{ + unsigned i; + + if (x == 0) + return 1; + + --x; + + for (i = 1; i < sizeof(unsigned) * 8; i <<= 1) + x |= x >> i; + + return x + 1; +} + + +/** + * Return number of bits set in n. + */ +static INLINE unsigned +util_bitcount(unsigned n) +{ +#if defined(PIPE_CC_GCC) + return __builtin_popcount(n); +#else + /* K&R classic bitcount. + * + * For each iteration, clear the LSB from the bitfield. + * Requires only one iteration per set bit, instead of + * one iteration per bit less than highest set bit. + */ + unsigned bits = 0; + for (bits; n; bits++) { + n &= n - 1; + } + return bits; +#endif +} + + +/** + * Reverse byte order of a 32 bit word. + */ +static INLINE uint32_t +util_bswap32(uint32_t n) +{ +#if defined(PIPE_CC_GCC) && (PIPE_CC_GCC_VERSION >= 403) + return __builtin_bswap32(n); +#else + return (n >> 24) | + ((n >> 8) & 0x0000ff00) | + ((n << 8) & 0x00ff0000) | + (n << 24); +#endif +} + + +/** * Clamp X to [MIN, MAX]. * This is a macro to allow float, int, uint, etc. types. */ @@ -471,6 +540,9 @@ util_logbase2(unsigned n) #define MIN2( A, B ) ( (A)<(B) ? (A) : (B) ) #define MAX2( A, B ) ( (A)>(B) ? (A) : (B) ) +#define MIN3( A, B, C ) MIN2( MIN2( A, B ), C ) +#define MAX3( A, B, C ) MAX2( MAX2( A, B ), C ) + static INLINE int align(int value, int alignment) @@ -479,9 +551,9 @@ align(int value, int alignment) } static INLINE unsigned -minify(unsigned value) +u_minify(unsigned value, unsigned levels) { - return MAX2(1, value >> 1); + return MAX2(1, value >> levels); } #ifndef COPY_4V diff --git a/src/gallium/auxiliary/util/u_mm.c b/src/gallium/auxiliary/util/u_mm.c index 4b75d4ba1d..82f83702d1 100644 --- a/src/gallium/auxiliary/util/u_mm.c +++ b/src/gallium/auxiliary/util/u_mm.c @@ -39,13 +39,20 @@ u_mmDumpMemInfo(const struct mem_block *heap) } else { const struct mem_block *p; + int total_used = 0, total_free = 0; for (p = heap->next; p != heap; p = p->next) { debug_printf(" Offset:%08x, Size:%08x, %c%c\n", p->ofs, p->size, p->free ? 'F':'.', p->reserved ? 'R':'.'); + if (p->free) + total_free += p->size; + else + total_used += p->size; } + debug_printf("'\nMemory stats: total = %d, used = %d, free = %d\n", + total_used + total_free, total_used, total_free); debug_printf("\nFree list:\n"); for (p = heap->next_free; p != heap; p = p->next_free) { diff --git a/src/gallium/auxiliary/util/u_network.c b/src/gallium/auxiliary/util/u_network.c index bc4b758406..6269c72e12 100644 --- a/src/gallium/auxiliary/util/u_network.c +++ b/src/gallium/auxiliary/util/u_network.c @@ -6,7 +6,7 @@ #if defined(PIPE_SUBSYSTEM_WINDOWS_USER) # include <winsock2.h> # include <windows.h> -#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) +#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_BSD) # include <sys/socket.h> # include <netinet/in.h> # include <unistd.h> @@ -54,7 +54,7 @@ u_socket_close(int s) if (s < 0) return; -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_BSD) shutdown(s, SHUT_RDWR); close(s); #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) @@ -169,7 +169,7 @@ u_socket_listen_on_port(uint16_t portnum) void u_socket_block(int s, boolean block) { -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_BSD) int old = fcntl(s, F_GETFL, 0); if (old == -1) return; diff --git a/src/gallium/auxiliary/util/u_network.h b/src/gallium/auxiliary/util/u_network.h index 8c778f492c..0aa898b967 100644 --- a/src/gallium/auxiliary/util/u_network.h +++ b/src/gallium/auxiliary/util/u_network.h @@ -6,7 +6,7 @@ #if defined(PIPE_SUBSYSTEM_WINDOWS_USER) # define PIPE_HAVE_SOCKETS -#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) +#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_BSD) # define PIPE_HAVE_SOCKETS #endif diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index eda883b3b9..a2e0f26686 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -40,101 +40,97 @@ #include "util/u_math.h" + +union util_color { + ubyte ub; + ushort us; + uint ui; + float f[4]; +}; + /** * Pack ubyte R,G,B,A into dest pixel. */ static INLINE void util_pack_color_ub(ubyte r, ubyte g, ubyte b, ubyte a, - enum pipe_format format, void *dest) + enum pipe_format format, union util_color *uc) { switch (format) { case PIPE_FORMAT_R8G8B8A8_UNORM: { - uint *d = (uint *) dest; - *d = (r << 24) | (g << 16) | (b << 8) | a; + uc->ui = (r << 24) | (g << 16) | (b << 8) | a; } return; case PIPE_FORMAT_R8G8B8X8_UNORM: { - uint *d = (uint *) dest; - *d = (r << 24) | (g << 16) | (b << 8) | 0xff; + uc->ui = (r << 24) | (g << 16) | (b << 8) | 0xff; } return; case PIPE_FORMAT_A8R8G8B8_UNORM: { - uint *d = (uint *) dest; - *d = (a << 24) | (r << 16) | (g << 8) | b; + uc->ui = (a << 24) | (r << 16) | (g << 8) | b; } return; case PIPE_FORMAT_X8R8G8B8_UNORM: { - uint *d = (uint *) dest; - *d = (0xff << 24) | (r << 16) | (g << 8) | b; + uc->ui = (0xff << 24) | (r << 16) | (g << 8) | b; } return; case PIPE_FORMAT_B8G8R8A8_UNORM: { - uint *d = (uint *) dest; - *d = (b << 24) | (g << 16) | (r << 8) | a; + uc->ui = (b << 24) | (g << 16) | (r << 8) | a; } return; case PIPE_FORMAT_B8G8R8X8_UNORM: { - uint *d = (uint *) dest; - *d = (b << 24) | (g << 16) | (r << 8) | 0xff; + uc->ui = (b << 24) | (g << 16) | (r << 8) | 0xff; } return; case PIPE_FORMAT_R5G6B5_UNORM: { - ushort *d = (ushort *) dest; - *d = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3); + uc->us = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3); } return; case PIPE_FORMAT_A1R5G5B5_UNORM: { - ushort *d = (ushort *) dest; - *d = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3); + uc->us = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3); } return; case PIPE_FORMAT_A4R4G4B4_UNORM: { - ushort *d = (ushort *) dest; - *d = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4); + uc->us = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4); } return; case PIPE_FORMAT_A8_UNORM: { - ubyte *d = (ubyte *) dest; - *d = a; + uc->ub = a; } return; case PIPE_FORMAT_L8_UNORM: case PIPE_FORMAT_I8_UNORM: { - ubyte *d = (ubyte *) dest; - *d = r; + uc->ub = a; } return; case PIPE_FORMAT_R32G32B32A32_FLOAT: { - float *d = (float *) dest; - d[0] = (float)r / 255.0f; - d[1] = (float)g / 255.0f; - d[2] = (float)b / 255.0f; - d[3] = (float)a / 255.0f; + uc->f[0] = (float)r / 255.0f; + uc->f[1] = (float)g / 255.0f; + uc->f[2] = (float)b / 255.0f; + uc->f[3] = (float)a / 255.0f; } return; case PIPE_FORMAT_R32G32B32_FLOAT: { - float *d = (float *) dest; - d[0] = (float)r / 255.0f; - d[1] = (float)g / 255.0f; - d[2] = (float)b / 255.0f; + uc->f[0] = (float)r / 255.0f; + uc->f[1] = (float)g / 255.0f; + uc->f[2] = (float)b / 255.0f; } return; /* XXX lots more cases to add */ default: + uc->ui = 0; /* keep compiler happy */ debug_print_format("gallium: unhandled format in util_pack_color_ub()", format); assert(0); } @@ -145,13 +141,13 @@ util_pack_color_ub(ubyte r, ubyte g, ubyte b, ubyte a, * Unpack RGBA from a packed pixel, returning values as ubytes in [0,255]. */ static INLINE void -util_unpack_color_ub(enum pipe_format format, const void *src, +util_unpack_color_ub(enum pipe_format format, union util_color *uc, ubyte *r, ubyte *g, ubyte *b, ubyte *a) { switch (format) { case PIPE_FORMAT_R8G8B8A8_UNORM: { - uint p = ((const uint *) src)[0]; + uint p = uc->ui; *r = (ubyte) ((p >> 24) & 0xff); *g = (ubyte) ((p >> 16) & 0xff); *b = (ubyte) ((p >> 8) & 0xff); @@ -160,7 +156,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, return; case PIPE_FORMAT_R8G8B8X8_UNORM: { - uint p = ((const uint *) src)[0]; + uint p = uc->ui; *r = (ubyte) ((p >> 24) & 0xff); *g = (ubyte) ((p >> 16) & 0xff); *b = (ubyte) ((p >> 8) & 0xff); @@ -169,7 +165,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, return; case PIPE_FORMAT_A8R8G8B8_UNORM: { - uint p = ((const uint *) src)[0]; + uint p = uc->ui; *r = (ubyte) ((p >> 16) & 0xff); *g = (ubyte) ((p >> 8) & 0xff); *b = (ubyte) ((p >> 0) & 0xff); @@ -178,7 +174,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, return; case PIPE_FORMAT_X8R8G8B8_UNORM: { - uint p = ((const uint *) src)[0]; + uint p = uc->ui; *r = (ubyte) ((p >> 16) & 0xff); *g = (ubyte) ((p >> 8) & 0xff); *b = (ubyte) ((p >> 0) & 0xff); @@ -187,7 +183,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, return; case PIPE_FORMAT_B8G8R8A8_UNORM: { - uint p = ((const uint *) src)[0]; + uint p = uc->ui; *r = (ubyte) ((p >> 8) & 0xff); *g = (ubyte) ((p >> 16) & 0xff); *b = (ubyte) ((p >> 24) & 0xff); @@ -196,7 +192,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, return; case PIPE_FORMAT_B8G8R8X8_UNORM: { - uint p = ((const uint *) src)[0]; + uint p = uc->ui; *r = (ubyte) ((p >> 8) & 0xff); *g = (ubyte) ((p >> 16) & 0xff); *b = (ubyte) ((p >> 24) & 0xff); @@ -205,7 +201,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, return; case PIPE_FORMAT_R5G6B5_UNORM: { - ushort p = ((const ushort *) src)[0]; + ushort p = uc->us; *r = (ubyte) (((p >> 8) & 0xf8) | ((p >> 13) & 0x7)); *g = (ubyte) (((p >> 3) & 0xfc) | ((p >> 9) & 0x3)); *b = (ubyte) (((p << 3) & 0xf8) | ((p >> 2) & 0x7)); @@ -214,7 +210,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, return; case PIPE_FORMAT_A1R5G5B5_UNORM: { - ushort p = ((const ushort *) src)[0]; + ushort p = uc->us; *r = (ubyte) (((p >> 7) & 0xf8) | ((p >> 12) & 0x7)); *g = (ubyte) (((p >> 2) & 0xf8) | ((p >> 7) & 0x7)); *b = (ubyte) (((p << 3) & 0xf8) | ((p >> 2) & 0x7)); @@ -223,7 +219,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, return; case PIPE_FORMAT_A4R4G4B4_UNORM: { - ushort p = ((const ushort *) src)[0]; + ushort p = uc->us; *r = (ubyte) (((p >> 4) & 0xf0) | ((p >> 8) & 0xf)); *g = (ubyte) (((p >> 0) & 0xf0) | ((p >> 4) & 0xf)); *b = (ubyte) (((p << 4) & 0xf0) | ((p >> 0) & 0xf)); @@ -232,27 +228,27 @@ util_unpack_color_ub(enum pipe_format format, const void *src, return; case PIPE_FORMAT_A8_UNORM: { - ubyte p = ((const ubyte *) src)[0]; + ubyte p = uc->ub; *r = *g = *b = (ubyte) 0xff; *a = p; } return; case PIPE_FORMAT_L8_UNORM: { - ubyte p = ((const ubyte *) src)[0]; + ubyte p = uc->ub; *r = *g = *b = p; *a = (ubyte) 0xff; } return; case PIPE_FORMAT_I8_UNORM: { - ubyte p = ((const ubyte *) src)[0]; + ubyte p = uc->ub; *r = *g = *b = *a = p; } return; case PIPE_FORMAT_R32G32B32A32_FLOAT: { - const float *p = (const float *) src; + const float *p = &uc->f[0]; *r = float_to_ubyte(p[0]); *g = float_to_ubyte(p[1]); *b = float_to_ubyte(p[2]); @@ -261,7 +257,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, return; case PIPE_FORMAT_R32G32B32_FLOAT: { - const float *p = (const float *) src; + const float *p = &uc->f[0]; *r = float_to_ubyte(p[0]); *g = float_to_ubyte(p[1]); *b = float_to_ubyte(p[2]); @@ -271,7 +267,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, case PIPE_FORMAT_R32G32_FLOAT: { - const float *p = (const float *) src; + const float *p = &uc->f[0]; *r = float_to_ubyte(p[0]); *g = float_to_ubyte(p[1]); *b = *a = (ubyte) 0xff; @@ -280,7 +276,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, case PIPE_FORMAT_R32_FLOAT: { - const float *p = (const float *) src; + const float *p = &uc->f[0]; *r = float_to_ubyte(p[0]); *g = *b = *a = (ubyte) 0xff; } @@ -293,16 +289,18 @@ util_unpack_color_ub(enum pipe_format format, const void *src, assert(0); } } - /** * Note rgba outside [0,1] will be clamped for int pixel formats. */ static INLINE void -util_pack_color(const float rgba[4], enum pipe_format format, void *dest) +util_pack_color(const float rgba[4], enum pipe_format format, union util_color *uc) { - ubyte r, g, b, a; + ubyte r = 0; + ubyte g = 0; + ubyte b = 0; + ubyte a = 0; if (pf_size_x(format) <= 8) { /* format uses 8-bit components or less */ @@ -315,90 +313,78 @@ util_pack_color(const float rgba[4], enum pipe_format format, void *dest) switch (format) { case PIPE_FORMAT_R8G8B8A8_UNORM: { - uint *d = (uint *) dest; - *d = (r << 24) | (g << 16) | (b << 8) | a; + uc->ui = (r << 24) | (g << 16) | (b << 8) | a; } return; case PIPE_FORMAT_R8G8B8X8_UNORM: { - uint *d = (uint *) dest; - *d = (r << 24) | (g << 16) | (b << 8) | 0xff; + uc->ui = (r << 24) | (g << 16) | (b << 8) | 0xff; } return; case PIPE_FORMAT_A8R8G8B8_UNORM: { - uint *d = (uint *) dest; - *d = (a << 24) | (r << 16) | (g << 8) | b; + uc->ui = (a << 24) | (r << 16) | (g << 8) | b; } return; case PIPE_FORMAT_X8R8G8B8_UNORM: { - uint *d = (uint *) dest; - *d = (0xff << 24) | (r << 16) | (g << 8) | b; + uc->ui = (0xff << 24) | (r << 16) | (g << 8) | b; } return; case PIPE_FORMAT_B8G8R8A8_UNORM: { - uint *d = (uint *) dest; - *d = (b << 24) | (g << 16) | (r << 8) | a; + uc->ui = (b << 24) | (g << 16) | (r << 8) | a; } return; case PIPE_FORMAT_B8G8R8X8_UNORM: { - uint *d = (uint *) dest; - *d = (b << 24) | (g << 16) | (r << 8) | 0xff; + uc->ui = (b << 24) | (g << 16) | (r << 8) | 0xff; } return; case PIPE_FORMAT_R5G6B5_UNORM: { - ushort *d = (ushort *) dest; - *d = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3); + uc->us = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3); } return; case PIPE_FORMAT_A1R5G5B5_UNORM: { - ushort *d = (ushort *) dest; - *d = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3); + uc->us = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3); } return; case PIPE_FORMAT_A4R4G4B4_UNORM: { - ushort *d = (ushort *) dest; - *d = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4); + uc->ub = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4); } return; case PIPE_FORMAT_A8_UNORM: { - ubyte *d = (ubyte *) dest; - *d = a; + uc->ub = a; } return; case PIPE_FORMAT_L8_UNORM: case PIPE_FORMAT_I8_UNORM: { - ubyte *d = (ubyte *) dest; - *d = r; + uc->ub = r; } return; case PIPE_FORMAT_R32G32B32A32_FLOAT: { - float *d = (float *) dest; - d[0] = rgba[0]; - d[1] = rgba[1]; - d[2] = rgba[2]; - d[3] = rgba[3]; + uc->f[0] = rgba[0]; + uc->f[1] = rgba[1]; + uc->f[2] = rgba[2]; + uc->f[3] = rgba[3]; } return; case PIPE_FORMAT_R32G32B32_FLOAT: { - float *d = (float *) dest; - d[0] = rgba[0]; - d[1] = rgba[1]; - d[2] = rgba[2]; + uc->f[0] = rgba[0]; + uc->f[1] = rgba[1]; + uc->f[2] = rgba[2]; } return; /* XXX lots more cases to add */ default: + uc->ui = 0; /* keep compiler happy */ debug_print_format("gallium: unhandled format in util_pack_color()", format); assert(0); } diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c index 9866b6fc8a..72725b59d2 100644 --- a/src/gallium/auxiliary/util/u_rect.c +++ b/src/gallium/auxiliary/util/u_rect.c @@ -44,7 +44,7 @@ */ void util_copy_rect(ubyte * dst, - const struct pipe_format_block *block, + enum pipe_format format, unsigned dst_stride, unsigned dst_x, unsigned dst_y, @@ -57,27 +57,30 @@ util_copy_rect(ubyte * dst, { unsigned i; int src_stride_pos = src_stride < 0 ? -src_stride : src_stride; + int blocksize = pf_get_blocksize(format); + int blockwidth = pf_get_blockwidth(format); + int blockheight = pf_get_blockheight(format); - assert(block->size > 0); - assert(block->width > 0); - assert(block->height > 0); + assert(blocksize > 0); + assert(blockwidth > 0); + assert(blockheight > 0); assert(src_x >= 0); assert(src_y >= 0); assert(dst_x >= 0); assert(dst_y >= 0); - dst_x /= block->width; - dst_y /= block->height; - width = (width + block->width - 1)/block->width; - height = (height + block->height - 1)/block->height; - src_x /= block->width; - src_y /= block->height; + dst_x /= blockwidth; + dst_y /= blockheight; + width = (width + blockwidth - 1)/blockwidth; + height = (height + blockheight - 1)/blockheight; + src_x /= blockwidth; + src_y /= blockheight; - dst += dst_x * block->size; - src += src_x * block->size; + dst += dst_x * blocksize; + src += src_x * blocksize; dst += dst_y * dst_stride; src += src_y * src_stride_pos; - width *= block->size; + width *= blocksize; if (width == dst_stride && width == src_stride) memcpy(dst, src, height * width); @@ -92,7 +95,7 @@ util_copy_rect(ubyte * dst, void util_fill_rect(ubyte * dst, - const struct pipe_format_block *block, + enum pipe_format format, unsigned dst_stride, unsigned dst_x, unsigned dst_y, @@ -102,23 +105,26 @@ util_fill_rect(ubyte * dst, { unsigned i, j; unsigned width_size; + int blocksize = pf_get_blocksize(format); + int blockwidth = pf_get_blockwidth(format); + int blockheight = pf_get_blockheight(format); - assert(block->size > 0); - assert(block->width > 0); - assert(block->height > 0); + assert(blocksize > 0); + assert(blockwidth > 0); + assert(blockheight > 0); assert(dst_x >= 0); assert(dst_y >= 0); - dst_x /= block->width; - dst_y /= block->height; - width = (width + block->width - 1)/block->width; - height = (height + block->height - 1)/block->height; + dst_x /= blockwidth; + dst_y /= blockheight; + width = (width + blockwidth - 1)/blockwidth; + height = (height + blockheight - 1)/blockheight; - dst += dst_x * block->size; + dst += dst_x * blocksize; dst += dst_y * dst_stride; - width_size = width * block->size; + width_size = width * blocksize; - switch (block->size) { + switch (blocksize) { case 1: if(dst_stride == width_size) memset(dst, (ubyte) value, height * width_size); @@ -172,10 +178,15 @@ util_surface_copy(struct pipe_context *pipe, struct pipe_transfer *src_trans, *dst_trans; void *dst_map; const void *src_map; + enum pipe_format src_format, dst_format; assert(src->texture && dst->texture); if (!src->texture || !dst->texture) return; + + src_format = src->texture->format; + dst_format = dst->texture->format; + src_trans = screen->get_tex_transfer(screen, src->texture, src->face, @@ -192,9 +203,9 @@ util_surface_copy(struct pipe_context *pipe, PIPE_TRANSFER_WRITE, dst_x, dst_y, w, h); - assert(dst_trans->block.size == src_trans->block.size); - assert(dst_trans->block.width == src_trans->block.width); - assert(dst_trans->block.height == src_trans->block.height); + assert(pf_get_blocksize(dst_format) == pf_get_blocksize(src_format)); + assert(pf_get_blockwidth(dst_format) == pf_get_blockwidth(src_format)); + assert(pf_get_blockheight(dst_format) == pf_get_blockheight(src_format)); src_map = pipe->screen->transfer_map(screen, src_trans); dst_map = pipe->screen->transfer_map(screen, dst_trans); @@ -205,7 +216,7 @@ util_surface_copy(struct pipe_context *pipe, if (src_map && dst_map) { /* If do_flip, invert src_y position and pass negative src stride */ util_copy_rect(dst_map, - &dst_trans->block, + dst_format, dst_trans->stride, 0, 0, w, h, @@ -259,11 +270,11 @@ util_surface_fill(struct pipe_context *pipe, if (dst_map) { assert(dst_trans->stride > 0); - switch (dst_trans->block.size) { + switch (pf_get_blocksize(dst_trans->texture->format)) { case 1: case 2: case 4: - util_fill_rect(dst_map, &dst_trans->block, dst_trans->stride, + util_fill_rect(dst_map, dst_trans->texture->format, dst_trans->stride, 0, 0, width, height, value); break; case 8: diff --git a/src/gallium/auxiliary/util/u_rect.h b/src/gallium/auxiliary/util/u_rect.h index daa50834d3..5e444ffae2 100644 --- a/src/gallium/auxiliary/util/u_rect.h +++ b/src/gallium/auxiliary/util/u_rect.h @@ -42,13 +42,13 @@ struct pipe_surface; extern void -util_copy_rect(ubyte * dst, const struct pipe_format_block *block, +util_copy_rect(ubyte * dst, enum pipe_format format, unsigned dst_stride, unsigned dst_x, unsigned dst_y, unsigned width, unsigned height, const ubyte * src, int src_stride, unsigned src_x, int src_y); extern void -util_fill_rect(ubyte * dst, const struct pipe_format_block *block, +util_fill_rect(ubyte * dst, enum pipe_format format, unsigned dst_stride, unsigned dst_x, unsigned dst_y, unsigned width, unsigned height, uint32_t value); diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index 0d706f9449..1c8b157d91 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -108,7 +108,15 @@ util_make_fragment_tex_shader_writemask(struct pipe_context *pipe, TGSI_SEMANTIC_COLOR, 0 ); - ureg_TEX( ureg, out, TGSI_TEXTURE_2D, tex, sampler ); + if (writemask != TGSI_WRITEMASK_XYZW) { + struct ureg_src imm = ureg_imm4f( ureg, 0, 0, 0, 1 ); + + ureg_MOV( ureg, out, imm ); + } + + ureg_TEX( ureg, + ureg_writemask(out, writemask), + TGSI_TEXTURE_2D, tex, sampler ); ureg_END( ureg ); return ureg_create_shader_and_destroy( ureg, pipe ); diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c index 85e443204e..f828908f0b 100644 --- a/src/gallium/auxiliary/util/u_surface.c +++ b/src/gallium/auxiliary/util/u_surface.c @@ -79,10 +79,9 @@ util_create_rgba_surface(struct pipe_screen *screen, templ.target = target; templ.format = format; templ.last_level = 0; - templ.width[0] = width; - templ.height[0] = height; - templ.depth[0] = 1; - pf_get_block(format, &templ.block); + templ.width0 = width; + templ.height0 = height; + templ.depth0 = 1; templ.tex_usage = usage; *textureOut = screen->texture_create(screen, &templ); diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index 0d6489c26e..88c9a1f097 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -52,7 +52,7 @@ pipe_get_tile_raw(struct pipe_transfer *pt, const void *src; if (dst_stride == 0) - dst_stride = pf_get_nblocksx(&pt->block, w) * pt->block.size; + dst_stride = pf_get_stride(pt->texture->format, w); if (pipe_clip_tile(x, y, &w, &h, pt)) return; @@ -62,7 +62,7 @@ pipe_get_tile_raw(struct pipe_transfer *pt, if(!src) return; - util_copy_rect(dst, &pt->block, dst_stride, 0, 0, w, h, src, pt->stride, x, y); + util_copy_rect(dst, pt->texture->format, dst_stride, 0, 0, w, h, src, pt->stride, x, y); screen->transfer_unmap(screen, pt); } @@ -78,9 +78,10 @@ pipe_put_tile_raw(struct pipe_transfer *pt, { struct pipe_screen *screen = pt->texture->screen; void *dst; + enum pipe_format format = pt->texture->format; if (src_stride == 0) - src_stride = pf_get_nblocksx(&pt->block, w) * pt->block.size; + src_stride = pf_get_stride(format, w); if (pipe_clip_tile(x, y, &w, &h, pt)) return; @@ -90,7 +91,7 @@ pipe_put_tile_raw(struct pipe_transfer *pt, if(!dst) return; - util_copy_rect(dst, &pt->block, pt->stride, x, y, w, h, src, src_stride, 0, 0); + util_copy_rect(dst, format, pt->stride, x, y, w, h, src, src_stride, 0, 0); screen->transfer_unmap(screen, pt); } @@ -246,6 +247,53 @@ b8g8r8a8_put_tile_rgba(unsigned *dst, } +/*** PIPE_FORMAT_R8G8B8A8_UNORM ***/ + +static void +r8g8b8a8_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const unsigned pixel = *src++; + pRow[0] = ubyte_to_float((pixel >> 24) & 0xff); + pRow[1] = ubyte_to_float((pixel >> 16) & 0xff); + pRow[2] = ubyte_to_float((pixel >> 8) & 0xff); + pRow[3] = ubyte_to_float((pixel >> 0) & 0xff); + } + p += dst_stride; + } +} + + +static void +r8g8b8a8_put_tile_rgba(unsigned *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, g, b, a; + r = float_to_ubyte(pRow[0]); + g = float_to_ubyte(pRow[1]); + b = float_to_ubyte(pRow[2]); + a = float_to_ubyte(pRow[3]); + *dst++ = (r << 24) | (g << 16) | (b << 8) | a; + } + p += src_stride; + } +} + + /*** PIPE_FORMAT_A1R5G5B5_UNORM ***/ static void @@ -1143,6 +1191,9 @@ pipe_tile_raw_to_rgba(enum pipe_format format, case PIPE_FORMAT_B8G8R8A8_UNORM: b8g8r8a8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + r8g8b8a8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; case PIPE_FORMAT_A1R5G5B5_UNORM: a1r5g5b5_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); break; @@ -1219,21 +1270,22 @@ pipe_get_tile_rgba(struct pipe_transfer *pt, { unsigned dst_stride = w * 4; void *packed; + enum pipe_format format = pt->texture->format; if (pipe_clip_tile(x, y, &w, &h, pt)) return; - packed = MALLOC(pf_get_nblocks(&pt->block, w, h) * pt->block.size); + packed = MALLOC(pf_get_nblocks(format, w, h) * pf_get_blocksize(format)); if (!packed) return; - if(pt->format == PIPE_FORMAT_YCBCR || pt->format == PIPE_FORMAT_YCBCR_REV) + if(format == PIPE_FORMAT_YCBCR || format == PIPE_FORMAT_YCBCR_REV) assert((x & 1) == 0); pipe_get_tile_raw(pt, x, y, w, h, packed, 0); - pipe_tile_raw_to_rgba(pt->format, packed, w, h, p, dst_stride); + pipe_tile_raw_to_rgba(format, packed, w, h, p, dst_stride); FREE(packed); } @@ -1246,16 +1298,17 @@ pipe_put_tile_rgba(struct pipe_transfer *pt, { unsigned src_stride = w * 4; void *packed; + enum pipe_format format = pt->texture->format; if (pipe_clip_tile(x, y, &w, &h, pt)) return; - packed = MALLOC(pf_get_nblocks(&pt->block, w, h) * pt->block.size); + packed = MALLOC(pf_get_nblocks(format, w, h) * pf_get_blocksize(format)); if (!packed) return; - switch (pt->format) { + switch (format) { case PIPE_FORMAT_A8R8G8B8_UNORM: a8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); break; @@ -1265,6 +1318,9 @@ pipe_put_tile_rgba(struct pipe_transfer *pt, case PIPE_FORMAT_B8G8R8A8_UNORM: b8g8r8a8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + r8g8b8a8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); + break; case PIPE_FORMAT_A1R5G5B5_UNORM: a1r5g5b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride); break; @@ -1274,9 +1330,6 @@ pipe_put_tile_rgba(struct pipe_transfer *pt, case PIPE_FORMAT_R8G8B8_UNORM: r8g8b8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride); break; - case PIPE_FORMAT_R8G8B8A8_UNORM: - assert(0); - break; case PIPE_FORMAT_A4R4G4B4_UNORM: a4r4g4b4_put_tile_rgba((ushort *) packed, w, h, p, src_stride); break; @@ -1322,7 +1375,7 @@ pipe_put_tile_rgba(struct pipe_transfer *pt, /*z24s8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ break; default: - debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(pt->format)); + debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(format)); } pipe_put_tile_raw(pt, x, y, w, h, packed, 0); @@ -1344,6 +1397,7 @@ pipe_get_tile_z(struct pipe_transfer *pt, ubyte *map; uint *pDest = z; uint i, j; + enum pipe_format format = pt->texture->format; if (pipe_clip_tile(x, y, &w, &h, pt)) return; @@ -1354,7 +1408,7 @@ pipe_get_tile_z(struct pipe_transfer *pt, return; } - switch (pt->format) { + switch (format) { case PIPE_FORMAT_Z32_UNORM: { const uint *ptrc @@ -1428,6 +1482,7 @@ pipe_put_tile_z(struct pipe_transfer *pt, const uint *ptrc = zSrc; ubyte *map; uint i, j; + enum pipe_format format = pt->texture->format; if (pipe_clip_tile(x, y, &w, &h, pt)) return; @@ -1438,7 +1493,7 @@ pipe_put_tile_z(struct pipe_transfer *pt, return; } - switch (pt->format) { + switch (format) { case PIPE_FORMAT_Z32_UNORM: { uint *pDest = (uint *) (map + y * pt->stride + x*4); @@ -1452,7 +1507,7 @@ pipe_put_tile_z(struct pipe_transfer *pt, case PIPE_FORMAT_S8Z24_UNORM: { uint *pDest = (uint *) (map + y * pt->stride + x*4); - assert(pt->usage == PIPE_TRANSFER_READ_WRITE); + assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE); for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 32-bit Z to 24-bit Z, preserve stencil */ @@ -1479,7 +1534,7 @@ pipe_put_tile_z(struct pipe_transfer *pt, case PIPE_FORMAT_Z24S8_UNORM: { uint *pDest = (uint *) (map + y * pt->stride + x*4); - assert(pt->usage == PIPE_TRANSFER_READ_WRITE); + assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE); for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 32-bit Z to 24-bit Z, preserve stencil */ diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c b/src/gallium/auxiliary/util/u_upload_mgr.c index eb635c9f14..975ee89c45 100644 --- a/src/gallium/auxiliary/util/u_upload_mgr.c +++ b/src/gallium/auxiliary/util/u_upload_mgr.c @@ -29,7 +29,7 @@ * coalescing small buffers into larger ones. */ -#include "pipe/p_error.h" +#include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "pipe/p_screen.h" #include "util/u_memory.h" diff --git a/src/gallium/auxiliary/vl/Makefile b/src/gallium/auxiliary/vl/Makefile new file mode 100644 index 0000000000..4314c1e8d6 --- /dev/null +++ b/src/gallium/auxiliary/vl/Makefile @@ -0,0 +1,13 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = vl + +C_SOURCES = \ + vl_bitstream_parser.c \ + vl_mpeg12_mc_renderer.c \ + vl_compositor.c \ + vl_csc.c \ + vl_shader_build.c + +include ../../Makefile.template diff --git a/src/gallium/auxiliary/vl/SConscript b/src/gallium/auxiliary/vl/SConscript new file mode 100644 index 0000000000..aed69f5efe --- /dev/null +++ b/src/gallium/auxiliary/vl/SConscript @@ -0,0 +1,13 @@ +Import('*') + +vl = env.ConvenienceLibrary( + target = 'vl', + source = [ + 'vl_bitstream_parser.c', + 'vl_mpeg12_mc_renderer.c', + 'vl_compositor.c', + 'vl_csc.c', + 'vl_shader_build.c', + ]) + +auxiliaries.insert(0, vl) diff --git a/src/gallium/auxiliary/vl/vl_bitstream_parser.c b/src/gallium/auxiliary/vl/vl_bitstream_parser.c new file mode 100644 index 0000000000..3193ea5f41 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_bitstream_parser.c @@ -0,0 +1,167 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "vl_bitstream_parser.h" +#include <assert.h> +#include <limits.h> +#include <util/u_memory.h> + +static unsigned +grab_bits(unsigned cursor, unsigned how_many_bits, unsigned bitstream_elt) +{ + unsigned excess_bits = sizeof(unsigned) * CHAR_BIT - how_many_bits - cursor; + + assert(cursor < sizeof(unsigned) * CHAR_BIT); + assert(how_many_bits > 0 && how_many_bits <= sizeof(unsigned) * CHAR_BIT); + assert(cursor + how_many_bits <= sizeof(unsigned) * CHAR_BIT); + + return (bitstream_elt << excess_bits) >> (excess_bits + cursor); +} + +static unsigned +show_bits(unsigned cursor, unsigned how_many_bits, const unsigned *bitstream) +{ + unsigned cur_int = cursor / (sizeof(unsigned) * CHAR_BIT); + unsigned cur_bit = cursor % (sizeof(unsigned) * CHAR_BIT); + + assert(bitstream); + + if (cur_bit + how_many_bits > sizeof(unsigned) * CHAR_BIT) { + unsigned lower = grab_bits(cur_bit, sizeof(unsigned) * CHAR_BIT - cur_bit, + bitstream[cur_int]); + unsigned upper = grab_bits(0, cur_bit + how_many_bits - sizeof(unsigned) * CHAR_BIT, + bitstream[cur_int + 1]); + return lower | upper << (sizeof(unsigned) * CHAR_BIT - cur_bit); + } + else + return grab_bits(cur_bit, how_many_bits, bitstream[cur_int]); +} + +bool vl_bitstream_parser_init(struct vl_bitstream_parser *parser, + unsigned num_bitstreams, + const void **bitstreams, + const unsigned *sizes) +{ + assert(parser); + assert(num_bitstreams); + assert(bitstreams); + assert(sizes); + + parser->num_bitstreams = num_bitstreams; + parser->bitstreams = (const unsigned**)bitstreams; + parser->sizes = sizes; + parser->cur_bitstream = 0; + parser->cursor = 0; + + return true; +} + +void vl_bitstream_parser_cleanup(struct vl_bitstream_parser *parser) +{ + assert(parser); +} + +unsigned +vl_bitstream_parser_get_bits(struct vl_bitstream_parser *parser, + unsigned how_many_bits) +{ + unsigned bits; + + assert(parser); + + bits = vl_bitstream_parser_show_bits(parser, how_many_bits); + + vl_bitstream_parser_forward(parser, how_many_bits); + + return bits; +} + +unsigned +vl_bitstream_parser_show_bits(struct vl_bitstream_parser *parser, + unsigned how_many_bits) +{ + unsigned bits = 0; + unsigned shift = 0; + unsigned cursor; + unsigned cur_bitstream; + + assert(parser); + + cursor = parser->cursor; + cur_bitstream = parser->cur_bitstream; + + while (1) { + unsigned bits_left = parser->sizes[cur_bitstream] * CHAR_BIT - cursor; + unsigned bits_to_show = how_many_bits > bits_left ? bits_left : how_many_bits; + + bits |= show_bits(cursor, bits_to_show, + parser->bitstreams[cur_bitstream]) << shift; + + if (how_many_bits > bits_to_show) { + how_many_bits -= bits_to_show; + cursor = 0; + ++cur_bitstream; + shift += bits_to_show; + } + else + break; + } + + return bits; +} + +void vl_bitstream_parser_forward(struct vl_bitstream_parser *parser, + unsigned how_many_bits) +{ + assert(parser); + assert(how_many_bits); + + parser->cursor += how_many_bits; + + while (parser->cursor > parser->sizes[parser->cur_bitstream] * CHAR_BIT) { + parser->cursor -= parser->sizes[parser->cur_bitstream++] * CHAR_BIT; + assert(parser->cur_bitstream < parser->num_bitstreams); + } +} + +void vl_bitstream_parser_rewind(struct vl_bitstream_parser *parser, + unsigned how_many_bits) +{ + signed c; + + assert(parser); + assert(how_many_bits); + + c = parser->cursor - how_many_bits; + + while (c < 0) { + c += parser->sizes[parser->cur_bitstream--] * CHAR_BIT; + assert(parser->cur_bitstream < parser->num_bitstreams); + } + + parser->cursor = (unsigned)c; +} diff --git a/src/gallium/auxiliary/vl/vl_bitstream_parser.h b/src/gallium/auxiliary/vl/vl_bitstream_parser.h new file mode 100644 index 0000000000..30ec743fa7 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_bitstream_parser.h @@ -0,0 +1,63 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef vl_bitstream_parser_h +#define vl_bitstream_parser_h + +#include "pipe/p_compiler.h" + +struct vl_bitstream_parser +{ + unsigned num_bitstreams; + const unsigned **bitstreams; + const unsigned *sizes; + unsigned cur_bitstream; + unsigned cursor; +}; + +bool vl_bitstream_parser_init(struct vl_bitstream_parser *parser, + unsigned num_bitstreams, + const void **bitstreams, + const unsigned *sizes); + +void vl_bitstream_parser_cleanup(struct vl_bitstream_parser *parser); + +unsigned +vl_bitstream_parser_get_bits(struct vl_bitstream_parser *parser, + unsigned how_many_bits); + +unsigned +vl_bitstream_parser_show_bits(struct vl_bitstream_parser *parser, + unsigned how_many_bits); + +void vl_bitstream_parser_forward(struct vl_bitstream_parser *parser, + unsigned how_many_bits); + +void vl_bitstream_parser_rewind(struct vl_bitstream_parser *parser, + unsigned how_many_bits); + +#endif /* vl_bitstream_parser_h */ diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c new file mode 100644 index 0000000000..fc2a1c59a6 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -0,0 +1,534 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "vl_compositor.h" +#include <assert.h> +#include <pipe/p_context.h> +#include <pipe/p_inlines.h> +#include <tgsi/tgsi_parse.h> +#include <tgsi/tgsi_build.h> +#include <util/u_memory.h> +#include "vl_csc.h" +#include "vl_shader_build.h" + +struct vertex2f +{ + float x, y; +}; + +struct vertex4f +{ + float x, y, z, w; +}; + +struct vertex_shader_consts +{ + struct vertex4f dst_scale; + struct vertex4f dst_trans; + struct vertex4f src_scale; + struct vertex4f src_trans; +}; + +struct fragment_shader_consts +{ + float matrix[16]; +}; + +/* + * Represents 2 triangles in a strip in normalized coords. + * Used to render the surface onto the frame buffer. + */ +static const struct vertex2f surface_verts[4] = +{ + {0.0f, 0.0f}, + {0.0f, 1.0f}, + {1.0f, 0.0f}, + {1.0f, 1.0f} +}; + +/* + * Represents texcoords for the above. We can use the position values directly. + * TODO: Duplicate these in the shader, no need to create a buffer. + */ +static const struct vertex2f *surface_texcoords = surface_verts; + +static void +create_vert_shader(struct vl_compositor *c) +{ + const unsigned max_tokens = 50; + + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned ti; + + unsigned i; + + assert(c); + + tokens = (struct tgsi_token*)MALLOC(max_tokens * sizeof(struct tgsi_token)); + header = (struct tgsi_header*)&tokens[0]; + *header = tgsi_build_header(); + *(struct tgsi_processor*)&tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 2; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Vertex texcoords + */ + for (i = 0; i < 2; i++) { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling vector to scale vertex pos rect to destination size + * decl c1 ; Translation vector to move vertex pos rect into position + * decl c2 ; Scaling vector to scale texcoord rect to source size + * decl c3 ; Translation vector to move texcoord rect into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Vertex texcoords + */ + for (i = 0; i < 2; i++) { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * mad o0, i0, c0, c1 ; Scale and translate unit output rect to destination size and pos + * mad o1, i1, c2, c3 ; Scale and translate unit texcoord rect to source size and pos + */ + for (i = 0; i < 2; ++i) { + inst = vl_inst4(TGSI_OPCODE_MAD, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i, TGSI_FILE_CONSTANT, i * 2, TGSI_FILE_CONSTANT, i * 2 + 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + assert(ti <= max_tokens); + + vs.tokens = tokens; + c->vertex_shader = c->pipe->create_vs_state(c->pipe, &vs); + FREE(tokens); +} + +static void +create_frag_shader(struct vl_compositor *c) +{ + const unsigned max_tokens = 50; + + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned ti; + + unsigned i; + + assert(c); + + tokens = (struct tgsi_token*)MALLOC(max_tokens * sizeof(struct tgsi_token)); + header = (struct tgsi_header*)&tokens[0]; + *header = tgsi_build_header(); + *(struct tgsi_processor*)&tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 2; + + /* decl i0 ; Texcoords for s0 */ + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl c0-c3 ; CSC matrix c0-c3 + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0 */ + decl = vl_decl_temps(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl s0 ; Sampler for tex containing picture to display */ + decl = vl_decl_samplers(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* tex2d t0, i0, s0 ; Read src pixel */ + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * dp4 o0.x, t0, c0 ; Multiply pixel by the color conversion matrix + * dp4 o0.y, t0, c1 + * dp4 o0.z, t0, c2 + * dp4 o0.w, t0, c3 + */ + for (i = 0; i < 4; ++i) { + inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i); + inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + assert(ti <= max_tokens); + + fs.tokens = tokens; + c->fragment_shader = c->pipe->create_fs_state(c->pipe, &fs); + FREE(tokens); +} + +static bool +init_pipe_state(struct vl_compositor *c) +{ + struct pipe_sampler_state sampler; + + assert(c); + + c->fb_state.nr_cbufs = 1; + c->fb_state.zsbuf = NULL; + + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + /*sampler.prefilter = ;*/ + /*sampler.lod_bias = ;*/ + /*sampler.min_lod = ;*/ + /*sampler.max_lod = ;*/ + /*sampler.border_color[i] = ;*/ + /*sampler.max_anisotropy = ;*/ + c->sampler = c->pipe->create_sampler_state(c->pipe, &sampler); + + return true; +} + +static void cleanup_pipe_state(struct vl_compositor *c) +{ + assert(c); + + c->pipe->delete_sampler_state(c->pipe, c->sampler); +} + +static bool +init_shaders(struct vl_compositor *c) +{ + assert(c); + + create_vert_shader(c); + create_frag_shader(c); + + return true; +} + +static void cleanup_shaders(struct vl_compositor *c) +{ + assert(c); + + c->pipe->delete_vs_state(c->pipe, c->vertex_shader); + c->pipe->delete_fs_state(c->pipe, c->fragment_shader); +} + +static bool +init_buffers(struct vl_compositor *c) +{ + struct fragment_shader_consts fsc; + + assert(c); + + /* + * Create our vertex buffer and vertex buffer element + * VB contains 4 vertices that render a quad covering the entire window + * to display a rendered surface + * Quad is rendered as a tri strip + */ + c->vertex_bufs[0].stride = sizeof(struct vertex2f); + c->vertex_bufs[0].max_index = 3; + c->vertex_bufs[0].buffer_offset = 0; + c->vertex_bufs[0].buffer = pipe_buffer_create + ( + c->pipe->screen, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct vertex2f) * 4 + ); + + memcpy + ( + pipe_buffer_map(c->pipe->screen, c->vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + surface_verts, + sizeof(struct vertex2f) * 4 + ); + + pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[0].buffer); + + c->vertex_elems[0].src_offset = 0; + c->vertex_elems[0].vertex_buffer_index = 0; + c->vertex_elems[0].nr_components = 2; + c->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* + * Create our texcoord buffer and texcoord buffer element + * Texcoord buffer contains the TCs for mapping the rendered surface to the 4 vertices + */ + c->vertex_bufs[1].stride = sizeof(struct vertex2f); + c->vertex_bufs[1].max_index = 3; + c->vertex_bufs[1].buffer_offset = 0; + c->vertex_bufs[1].buffer = pipe_buffer_create + ( + c->pipe->screen, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct vertex2f) * 4 + ); + + memcpy + ( + pipe_buffer_map(c->pipe->screen, c->vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + surface_texcoords, + sizeof(struct vertex2f) * 4 + ); + + pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[1].buffer); + + c->vertex_elems[1].src_offset = 0; + c->vertex_elems[1].vertex_buffer_index = 1; + c->vertex_elems[1].nr_components = 2; + c->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* + * Create our vertex shader's constant buffer + * Const buffer contains scaling and translation vectors + */ + c->vs_const_buf.buffer = pipe_buffer_create + ( + c->pipe->screen, + 1, + PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD, + sizeof(struct vertex_shader_consts) + ); + + /* + * Create our fragment shader's constant buffer + * Const buffer contains the color conversion matrix and bias vectors + */ + c->fs_const_buf.buffer = pipe_buffer_create + ( + c->pipe->screen, + 1, + PIPE_BUFFER_USAGE_CONSTANT, + sizeof(struct fragment_shader_consts) + ); + + vl_csc_get_matrix(VL_CSC_COLOR_STANDARD_IDENTITY, NULL, true, fsc.matrix); + + vl_compositor_set_csc_matrix(c, fsc.matrix); + + return true; +} + +static void +cleanup_buffers(struct vl_compositor *c) +{ + unsigned i; + + assert(c); + + for (i = 0; i < 2; ++i) + pipe_buffer_reference(&c->vertex_bufs[i].buffer, NULL); + + pipe_buffer_reference(&c->vs_const_buf.buffer, NULL); + pipe_buffer_reference(&c->fs_const_buf.buffer, NULL); +} + +bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe) +{ + assert(compositor); + + memset(compositor, 0, sizeof(struct vl_compositor)); + + compositor->pipe = pipe; + + if (!init_pipe_state(compositor)) + return false; + if (!init_shaders(compositor)) { + cleanup_pipe_state(compositor); + return false; + } + if (!init_buffers(compositor)) { + cleanup_shaders(compositor); + cleanup_pipe_state(compositor); + return false; + } + + return true; +} + +void vl_compositor_cleanup(struct vl_compositor *compositor) +{ + assert(compositor); + + cleanup_buffers(compositor); + cleanup_shaders(compositor); + cleanup_pipe_state(compositor); +} + +void vl_compositor_render(struct vl_compositor *compositor, + /*struct pipe_texture *backround, + struct pipe_video_rect *backround_area,*/ + struct pipe_texture *src_surface, + enum pipe_mpeg12_picture_type picture_type, + /*unsigned num_past_surfaces, + struct pipe_texture *past_surfaces, + unsigned num_future_surfaces, + struct pipe_texture *future_surfaces,*/ + struct pipe_video_rect *src_area, + struct pipe_texture *dst_surface, + struct pipe_video_rect *dst_area, + /*unsigned num_layers, + struct pipe_texture *layers, + struct pipe_video_rect *layer_src_areas, + struct pipe_video_rect *layer_dst_areas*/ + struct pipe_fence_handle **fence) +{ + struct vertex_shader_consts *vs_consts; + + assert(compositor); + assert(src_surface); + assert(src_area); + assert(dst_surface); + assert(dst_area); + assert(picture_type == PIPE_MPEG12_PICTURE_TYPE_FRAME); + + compositor->fb_state.width = dst_surface->width0; + compositor->fb_state.height = dst_surface->height0; + compositor->fb_state.cbufs[0] = compositor->pipe->screen->get_tex_surface + ( + compositor->pipe->screen, + dst_surface, + 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE + ); + + compositor->viewport.scale[0] = compositor->fb_state.width; + compositor->viewport.scale[1] = compositor->fb_state.height; + compositor->viewport.scale[2] = 1; + compositor->viewport.scale[3] = 1; + compositor->viewport.translate[0] = 0; + compositor->viewport.translate[1] = 0; + compositor->viewport.translate[2] = 0; + compositor->viewport.translate[3] = 0; + + compositor->scissor.maxx = compositor->fb_state.width; + compositor->scissor.maxy = compositor->fb_state.height; + + compositor->pipe->set_framebuffer_state(compositor->pipe, &compositor->fb_state); + compositor->pipe->set_viewport_state(compositor->pipe, &compositor->viewport); + compositor->pipe->set_scissor_state(compositor->pipe, &compositor->scissor); + compositor->pipe->bind_fragment_sampler_states(compositor->pipe, 1, &compositor->sampler); + compositor->pipe->set_fragment_sampler_textures(compositor->pipe, 1, &src_surface); + compositor->pipe->bind_vs_state(compositor->pipe, compositor->vertex_shader); + compositor->pipe->bind_fs_state(compositor->pipe, compositor->fragment_shader); + compositor->pipe->set_vertex_buffers(compositor->pipe, 2, compositor->vertex_bufs); + compositor->pipe->set_vertex_elements(compositor->pipe, 2, compositor->vertex_elems); + compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_VERTEX, 0, &compositor->vs_const_buf); + compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_FRAGMENT, 0, &compositor->fs_const_buf); + + vs_consts = pipe_buffer_map + ( + compositor->pipe->screen, + compositor->vs_const_buf.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD + ); + + vs_consts->dst_scale.x = dst_area->w / (float)compositor->fb_state.cbufs[0]->width; + vs_consts->dst_scale.y = dst_area->h / (float)compositor->fb_state.cbufs[0]->height; + vs_consts->dst_scale.z = 1; + vs_consts->dst_scale.w = 1; + vs_consts->dst_trans.x = dst_area->x / (float)compositor->fb_state.cbufs[0]->width; + vs_consts->dst_trans.y = dst_area->y / (float)compositor->fb_state.cbufs[0]->height; + vs_consts->dst_trans.z = 0; + vs_consts->dst_trans.w = 0; + + vs_consts->src_scale.x = src_area->w / (float)src_surface->width0; + vs_consts->src_scale.y = src_area->h / (float)src_surface->height0; + vs_consts->src_scale.z = 1; + vs_consts->src_scale.w = 1; + vs_consts->src_trans.x = src_area->x / (float)src_surface->width0; + vs_consts->src_trans.y = src_area->y / (float)src_surface->height0; + vs_consts->src_trans.z = 0; + vs_consts->src_trans.w = 0; + + pipe_buffer_unmap(compositor->pipe->screen, compositor->vs_const_buf.buffer); + + compositor->pipe->draw_arrays(compositor->pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); + compositor->pipe->flush(compositor->pipe, PIPE_FLUSH_RENDER_CACHE, fence); + + pipe_surface_reference(&compositor->fb_state.cbufs[0], NULL); +} + +void vl_compositor_set_csc_matrix(struct vl_compositor *compositor, const float *mat) +{ + assert(compositor); + + memcpy + ( + pipe_buffer_map(compositor->pipe->screen, compositor->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + mat, + sizeof(struct fragment_shader_consts) + ); + + pipe_buffer_unmap(compositor->pipe->screen, compositor->fs_const_buf.buffer); +} diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h new file mode 100644 index 0000000000..f441901a75 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_compositor.h @@ -0,0 +1,77 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef vl_compositor_h +#define vl_compositor_h + +#include <pipe/p_compiler.h> +#include <pipe/p_state.h> +#include <pipe/p_video_state.h> + +struct pipe_context; +struct pipe_texture; + +struct vl_compositor +{ + struct pipe_context *pipe; + + struct pipe_framebuffer_state fb_state; + void *sampler; + void *vertex_shader; + void *fragment_shader; + struct pipe_viewport_state viewport; + struct pipe_scissor_state scissor; + struct pipe_vertex_buffer vertex_bufs[2]; + struct pipe_vertex_element vertex_elems[2]; + struct pipe_constant_buffer vs_const_buf, fs_const_buf; +}; + +bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe); + +void vl_compositor_cleanup(struct vl_compositor *compositor); + +void vl_compositor_render(struct vl_compositor *compositor, + /*struct pipe_texture *backround, + struct pipe_video_rect *backround_area,*/ + struct pipe_texture *src_surface, + enum pipe_mpeg12_picture_type picture_type, + /*unsigned num_past_surfaces, + struct pipe_texture *past_surfaces, + unsigned num_future_surfaces, + struct pipe_texture *future_surfaces,*/ + struct pipe_video_rect *src_area, + struct pipe_texture *dst_surface, + struct pipe_video_rect *dst_area, + /*unsigned num_layers, + struct pipe_texture *layers, + struct pipe_video_rect *layer_src_areas, + struct pipe_video_rect *layer_dst_areas,*/ + struct pipe_fence_handle **fence); + +void vl_compositor_set_csc_matrix(struct vl_compositor *compositor, const float *mat); + +#endif /* vl_compositor_h */ diff --git a/src/gallium/auxiliary/vl/vl_csc.c b/src/gallium/auxiliary/vl/vl_csc.c new file mode 100644 index 0000000000..5ecc43a5fa --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_csc.c @@ -0,0 +1,206 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "vl_csc.h" +#include <util/u_math.h> +#include <util/u_debug.h> + +/* + * Color space conversion formulas + * + * To convert YCbCr to RGB, + * vec4 ycbcr, rgb + * mat44 csc + * rgb = csc * ycbcr + * + * To calculate the color space conversion matrix csc with ProcAmp adjustments, + * mat44 csc, cstd, procamp, bias + * csc = cstd * (procamp * bias) + * + * Where cstd is a matrix corresponding to one of the color standards (BT.601, BT.709, etc) + * adjusted for the kind of YCbCr -> RGB mapping wanted (1:1, full), + * bias is a matrix corresponding to the kind of YCbCr -> RGB mapping wanted (1:1, full) + * + * To calculate procamp, + * mat44 procamp, hue, saturation, brightness, contrast + * procamp = brightness * (saturation * (contrast * hue)) + * Alternatively, + * procamp = saturation * (brightness * (contrast * hue)) + * + * contrast + * [ c, 0, 0, 0] + * [ 0, c, 0, 0] + * [ 0, 0, c, 0] + * [ 0, 0, 0, 1] + * + * brightness + * [ 1, 0, 0, b] + * [ 0, 1, 0, 0] + * [ 0, 0, 1, 0] + * [ 0, 0, 0, 1] + * + * saturation + * [ 1, 0, 0, 0] + * [ 0, s, 0, 0] + * [ 0, 0, s, 0] + * [ 0, 0, 0, 1] + * + * hue + * [ 1, 0, 0, 0] + * [ 0, cos(h), sin(h), 0] + * [ 0, -sin(h), cos(h), 0] + * [ 0, 0, 0, 1] + * + * procamp + * [ c, 0, 0, b] + * [ 0, c*s*cos(h), c*s*sin(h), 0] + * [ 0, -c*s*sin(h), c*s*cos(h), 0] + * [ 0, 0, 0, 1] + * + * bias + * [ 1, 0, 0, ybias] + * [ 0, 1, 0, cbbias] + * [ 0, 0, 1, crbias] + * [ 0, 0, 0, 1] + * + * csc + * [ c*cstd[ 0], c*cstd[ 1]*s*cos(h) - c*cstd[ 2]*s*sin(h), c*cstd[ 2]*s*cos(h) + c*cstd[ 1]*s*sin(h), cstd[ 3] + cstd[ 0]*(b + c*ybias) + cstd[ 1]*(c*cbbias*s*cos(h) + c*crbias*s*sin(h)) + cstd[ 2]*(c*crbias*s*cos(h) - c*cbbias*s*sin(h))] + * [ c*cstd[ 4], c*cstd[ 5]*s*cos(h) - c*cstd[ 6]*s*sin(h), c*cstd[ 6]*s*cos(h) + c*cstd[ 5]*s*sin(h), cstd[ 7] + cstd[ 4]*(b + c*ybias) + cstd[ 5]*(c*cbbias*s*cos(h) + c*crbias*s*sin(h)) + cstd[ 6]*(c*crbias*s*cos(h) - c*cbbias*s*sin(h))] + * [ c*cstd[ 8], c*cstd[ 9]*s*cos(h) - c*cstd[10]*s*sin(h), c*cstd[10]*s*cos(h) + c*cstd[ 9]*s*sin(h), cstd[11] + cstd[ 8]*(b + c*ybias) + cstd[ 9]*(c*cbbias*s*cos(h) + c*crbias*s*sin(h)) + cstd[10]*(c*crbias*s*cos(h) - c*cbbias*s*sin(h))] + * [ c*cstd[12], c*cstd[13]*s*cos(h) - c*cstd[14]*s*sin(h), c*cstd[14]*s*cos(h) + c*cstd[13]*s*sin(h), cstd[15] + cstd[12]*(b + c*ybias) + cstd[13]*(c*cbbias*s*cos(h) + c*crbias*s*sin(h)) + cstd[14]*(c*crbias*s*cos(h) - c*cbbias*s*sin(h))] + */ + +/* + * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [16,235] + */ +static const float bt_601[16] = +{ + 1.0f, 0.0f, 1.371f, 0.0f, + 1.0f, -0.336f, -0.698f, 0.0f, + 1.0f, 1.732f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f +}; + +/* + * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [0,255] + */ +static const float bt_601_full[16] = +{ + 1.164f, 0.0f, 1.596f, 0.0f, + 1.164f, -0.391f, -0.813f, 0.0f, + 1.164f, 2.018f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f +}; + +/* + * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [16,235] + */ +static const float bt_709[16] = +{ + 1.0f, 0.0f, 1.540f, 0.0f, + 1.0f, -0.183f, -0.459f, 0.0f, + 1.0f, 1.816f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f +}; + +/* + * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [0,255] + */ +static const float bt_709_full[16] = +{ + 1.164f, 0.0f, 1.793f, 0.0f, + 1.164f, -0.213f, -0.534f, 0.0f, + 1.164f, 2.115f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f +}; + +static const float identity[16] = +{ + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f +}; + +void vl_csc_get_matrix(enum VL_CSC_COLOR_STANDARD cs, + struct vl_procamp *procamp, + bool full_range, + float *matrix) +{ + float ybias = full_range ? -16.0f/255.0f : 0.0f; + float cbbias = -128.0f/255.0f; + float crbias = -128.0f/255.0f; + float c = procamp ? procamp->contrast : 1.0f; + float s = procamp ? procamp->saturation : 1.0f; + float b = procamp ? procamp->brightness : 0.0f; + float h = procamp ? procamp->hue : 0.0f; + const float *cstd; + + assert(matrix); + + switch (cs) { + case VL_CSC_COLOR_STANDARD_BT_601: + cstd = full_range ? &bt_601_full[0] : &bt_601[0]; + break; + case VL_CSC_COLOR_STANDARD_BT_709: + cstd = full_range ? &bt_709_full[0] : &bt_709[0]; + break; + case VL_CSC_COLOR_STANDARD_IDENTITY: + default: + assert(cs == VL_CSC_COLOR_STANDARD_IDENTITY); + memcpy(matrix, &identity[0], sizeof(float) * 16); + return; + } + + matrix[ 0] = c*cstd[ 0]; + matrix[ 1] = c*cstd[ 1]*s*cosf(h) - c*cstd[ 2]*s*sinf(h); + matrix[ 2] = c*cstd[ 2]*s*cosf(h) + c*cstd[ 1]*s*sinf(h); + matrix[ 3] = cstd[ 3] + cstd[ 0]*(b + c*ybias) + cstd[ 1]*(c*cbbias*s*cosf(h) + c*crbias*s*sinf(h)) + cstd[ 2]*(c*crbias*s*cosf(h) - c*cbbias*s*sinf(h)); + + matrix[ 4] = c*cstd[ 4]; + matrix[ 5] = c*cstd[ 5]*s*cosf(h) - c*cstd[ 6]*s*sinf(h); + matrix[ 6] = c*cstd[ 6]*s*cosf(h) + c*cstd[ 5]*s*sinf(h); + matrix[ 7] = cstd[ 7] + cstd[ 4]*(b + c*ybias) + cstd[ 5]*(c*cbbias*s*cosf(h) + c*crbias*s*sinf(h)) + cstd[ 6]*(c*crbias*s*cosf(h) - c*cbbias*s*sinf(h)); + + matrix[ 8] = c*cstd[ 8]; + matrix[ 9] = c*cstd[ 9]*s*cosf(h) - c*cstd[10]*s*sinf(h); + matrix[10] = c*cstd[10]*s*cosf(h) + c*cstd[ 9]*s*sinf(h); + matrix[11] = cstd[11] + cstd[ 8]*(b + c*ybias) + cstd[ 9]*(c*cbbias*s*cosf(h) + c*crbias*s*sinf(h)) + cstd[10]*(c*crbias*s*cosf(h) - c*cbbias*s*sinf(h)); + + matrix[12] = c*cstd[12]; + matrix[13] = c*cstd[13]*s*cos(h) - c*cstd[14]*s*sin(h); + matrix[14] = c*cstd[14]*s*cos(h) + c*cstd[13]*s*sin(h); + matrix[15] = cstd[15] + cstd[12]*(b + c*ybias) + cstd[13]*(c*cbbias*s*cos(h) + c*crbias*s*sin(h)) + cstd[14]*(c*crbias*s*cos(h) - c*cbbias*s*sin(h)); +} diff --git a/src/gallium/auxiliary/vl/vl_csc.h b/src/gallium/auxiliary/vl/vl_csc.h new file mode 100644 index 0000000000..722ca35f33 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_csc.h @@ -0,0 +1,53 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef vl_csc_h +#define vl_csc_h + +#include <pipe/p_compiler.h> + +struct vl_procamp +{ + float brightness; + float contrast; + float saturation; + float hue; +}; + +enum VL_CSC_COLOR_STANDARD +{ + VL_CSC_COLOR_STANDARD_IDENTITY, + VL_CSC_COLOR_STANDARD_BT_601, + VL_CSC_COLOR_STANDARD_BT_709 +}; + +void vl_csc_get_matrix(enum VL_CSC_COLOR_STANDARD cs, + struct vl_procamp *procamp, + bool full_range, + float *matrix); + +#endif /* vl_csc_h */ diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c new file mode 100644 index 0000000000..4952e9c9f8 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c @@ -0,0 +1,1653 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "vl_mpeg12_mc_renderer.h" +#include <assert.h> +#include <pipe/p_context.h> +#include <pipe/p_inlines.h> +#include <util/u_math.h> +#include <util/u_memory.h> +#include <tgsi/tgsi_parse.h> +#include <tgsi/tgsi_build.h> +#include "vl_shader_build.h" + +#define DEFAULT_BUF_ALIGNMENT 1 +#define MACROBLOCK_WIDTH 16 +#define MACROBLOCK_HEIGHT 16 +#define BLOCK_WIDTH 8 +#define BLOCK_HEIGHT 8 +#define ZERO_BLOCK_NIL -1.0f +#define ZERO_BLOCK_IS_NIL(zb) ((zb).x < 0.0f) + +struct vertex2f +{ + float x, y; +}; + +struct vertex4f +{ + float x, y, z, w; +}; + +struct vertex_shader_consts +{ + struct vertex4f denorm; +}; + +struct fragment_shader_consts +{ + struct vertex4f multiplier; + struct vertex4f div; +}; + +/* + * Muliplier renormalizes block samples from 16 bits to 12 bits. + * Divider is used when calculating Y % 2 for choosing top or bottom + * field for P or B macroblocks. + * TODO: Use immediates. + */ +static const struct fragment_shader_consts fs_consts = { + {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f}, + {0.5f, 2.0f, 0.0f, 0.0f} +}; + +struct vert_stream_0 +{ + struct vertex2f pos; + struct vertex2f luma_tc; + struct vertex2f cb_tc; + struct vertex2f cr_tc; +}; + +enum MACROBLOCK_TYPE +{ + MACROBLOCK_TYPE_INTRA, + MACROBLOCK_TYPE_FWD_FRAME_PRED, + MACROBLOCK_TYPE_FWD_FIELD_PRED, + MACROBLOCK_TYPE_BKWD_FRAME_PRED, + MACROBLOCK_TYPE_BKWD_FIELD_PRED, + MACROBLOCK_TYPE_BI_FRAME_PRED, + MACROBLOCK_TYPE_BI_FIELD_PRED, + + NUM_MACROBLOCK_TYPES +}; + +static void +create_intra_vert_shader(struct vl_mpeg12_mc_renderer *r) +{ + const unsigned max_tokens = 50; + + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned ti; + + unsigned i; + + assert(r); + + tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token)); + header = (struct tgsi_header *) &tokens[0]; + *header = tgsi_build_header(); + *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 2; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma Cb texcoords + * decl i3 ; Chroma Cr texcoords + */ + for (i = 0; i < 4; i++) { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma Cb texcoords + * decl o3 ; Chroma Cr texcoords + */ + for (i = 0; i < 4; i++) { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * mov o0, i0 ; Move input vertex pos to output + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma Cb texcoords to output + * mov o3, i3 ; Move input chroma Cr texcoords to output + */ + for (i = 0; i < 4; ++i) { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + assert(ti <= max_tokens); + + vs.tokens = tokens; + r->i_vs = r->pipe->create_vs_state(r->pipe, &vs); + free(tokens); +} + +static void +create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r) +{ + const unsigned max_tokens = 100; + + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned ti; + + unsigned i; + + assert(r); + + tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token)); + header = (struct tgsi_header *) &tokens[0]; + *header = tgsi_build_header(); + *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 2; + + /* + * decl i0 ; Luma texcoords + * decl i1 ; Chroma Cb texcoords + * decl i2 ; Chroma Cr texcoords + */ + for (i = 0; i < 3; ++i) { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + */ + for (i = 0; i < 3; ++i) { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i2, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X; + inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul o0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + assert(ti <= max_tokens); + + fs.tokens = tokens; + r->i_fs = r->pipe->create_fs_state(r->pipe, &fs); + free(tokens); +} + +static void +create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) +{ + const unsigned max_tokens = 100; + + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned ti; + + unsigned i; + + assert(r); + + tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token)); + header = (struct tgsi_header *) &tokens[0]; + *header = tgsi_build_header(); + *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 2; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma Cb texcoords + * decl i3 ; Chroma Cr texcoords + * decl i4 ; Ref surface top field texcoords + * decl i5 ; Ref surface bottom field texcoords (unused, packed in the same stream) + */ + for (i = 0; i < 6; i++) { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma Cb texcoords + * decl o3 ; Chroma Cr texcoords + * decl o4 ; Ref macroblock texcoords + */ + for (i = 0; i < 5; i++) { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * mov o0, i0 ; Move input vertex pos to output + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma Cb texcoords to output + * mov o3, i3 ; Move input chroma Cr texcoords to output + */ + for (i = 0; i < 4; ++i) { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* add o4, i0, i4 ; Translate vertex pos by motion vec to form ref macroblock texcoords */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, 4); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + assert(ti <= max_tokens); + + vs.tokens = tokens; + r->p_vs[0] = r->pipe->create_vs_state(r->pipe, &vs); + free(tokens); +} + +static void +create_field_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) +{ + assert(false); +} + +static void +create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) +{ + const unsigned max_tokens = 100; + + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned ti; + + unsigned i; + + assert(r); + + tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token)); + header = (struct tgsi_header *) &tokens[0]; + *header = tgsi_build_header(); + *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 2; + + /* + * decl i0 ; Luma texcoords + * decl i1 ; Chroma Cb texcoords + * decl i2 ; Chroma Cr texcoords + * decl i3 ; Ref macroblock texcoords + */ + for (i = 0; i < 4; ++i) { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for ref surface texture + */ + for (i = 0; i < 4; ++i) { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i2, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X; + inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* tex2d t1, i3, s3 ; Read texel from ref macroblock */ + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 3, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + assert(ti <= max_tokens); + + fs.tokens = tokens; + r->p_fs[0] = r->pipe->create_fs_state(r->pipe, &fs); + free(tokens); +} + +static void +create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) +{ + assert(false); +} + +static void +create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) +{ + const unsigned max_tokens = 100; + + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned ti; + + unsigned i; + + assert(r); + + tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token)); + header = (struct tgsi_header *) &tokens[0]; + *header = tgsi_build_header(); + *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 2; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma Cb texcoords + * decl i3 ; Chroma Cr texcoords + * decl i4 ; First ref macroblock top field texcoords + * decl i5 ; First ref macroblock bottom field texcoords (unused, packed in the same stream) + * decl i6 ; Second ref macroblock top field texcoords + * decl i7 ; Second ref macroblock bottom field texcoords (unused, packed in the same stream) + */ + for (i = 0; i < 8; i++) { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma Cb texcoords + * decl o3 ; Chroma Cr texcoords + * decl o4 ; First ref macroblock texcoords + * decl o5 ; Second ref macroblock texcoords + */ + for (i = 0; i < 6; i++) { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * mov o0, i0 ; Move input vertex pos to output + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma Cb texcoords to output + * mov o3, i3 ; Move input chroma Cr texcoords to output + */ + for (i = 0; i < 4; ++i) { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* + * add o4, i0, i4 ; Translate vertex pos by motion vec to form first ref macroblock texcoords + * add o5, i0, i6 ; Translate vertex pos by motion vec to form second ref macroblock texcoords + */ + for (i = 0; i < 2; ++i) { + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, (i + 2) * 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + assert(ti <= max_tokens); + + vs.tokens = tokens; + r->b_vs[0] = r->pipe->create_vs_state(r->pipe, &vs); + free(tokens); +} + +static void +create_field_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) +{ + assert(false); +} + +static void +create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) +{ + const unsigned max_tokens = 100; + + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned ti; + + unsigned i; + + assert(r); + + tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token)); + header = (struct tgsi_header *) &tokens[0]; + *header = tgsi_build_header(); + *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 2; + + /* + * decl i0 ; Luma texcoords + * decl i1 ; Chroma Cb texcoords + * decl i2 ; Chroma Cr texcoords + * decl i3 ; First ref macroblock texcoords + * decl i4 ; Second ref macroblock texcoords + */ + for (i = 0; i < 5; ++i) { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm + * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0-t2 */ + decl = vl_decl_temps(0, 2); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for first ref surface texture + * decl s4 ; Sampler for second ref surface texture + */ + for (i = 0; i < 5; ++i) { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i2, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X; + inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t1, i3, s3 ; Read texel from first ref macroblock + * tex2d t2, i4, s4 ; Read texel from second ref macroblock + */ + for (i = 0; i < 2; ++i) { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, i + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ + inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + assert(ti <= max_tokens); + + fs.tokens = tokens; + r->b_fs[0] = r->pipe->create_fs_state(r->pipe, &fs); + free(tokens); +} + +static void +create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) +{ + assert(false); +} + +static void +xfer_buffers_map(struct vl_mpeg12_mc_renderer *r) +{ + unsigned i; + + assert(r); + + for (i = 0; i < 3; ++i) { + r->tex_transfer[i] = r->pipe->screen->get_tex_transfer + ( + r->pipe->screen, r->textures.all[i], + 0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0, + r->textures.all[i]->width0, r->textures.all[i]->height0 + ); + + r->texels[i] = r->pipe->screen->transfer_map(r->pipe->screen, r->tex_transfer[i]); + } +} + +static void +xfer_buffers_unmap(struct vl_mpeg12_mc_renderer *r) +{ + unsigned i; + + assert(r); + + for (i = 0; i < 3; ++i) { + r->pipe->screen->transfer_unmap(r->pipe->screen, r->tex_transfer[i]); + r->pipe->screen->tex_transfer_destroy(r->tex_transfer[i]); + } +} + +static bool +init_pipe_state(struct vl_mpeg12_mc_renderer *r) +{ + struct pipe_sampler_state sampler; + unsigned filters[5]; + unsigned i; + + assert(r); + + r->viewport.scale[0] = r->pot_buffers ? + util_next_power_of_two(r->picture_width) : r->picture_width; + r->viewport.scale[1] = r->pot_buffers ? + util_next_power_of_two(r->picture_height) : r->picture_height; + r->viewport.scale[2] = 1; + r->viewport.scale[3] = 1; + r->viewport.translate[0] = 0; + r->viewport.translate[1] = 0; + r->viewport.translate[2] = 0; + r->viewport.translate[3] = 0; + + r->scissor.maxx = r->pot_buffers ? + util_next_power_of_two(r->picture_width) : r->picture_width; + r->scissor.maxy = r->pot_buffers ? + util_next_power_of_two(r->picture_height) : r->picture_height; + + r->fb_state.width = r->pot_buffers ? + util_next_power_of_two(r->picture_width) : r->picture_width; + r->fb_state.height = r->pot_buffers ? + util_next_power_of_two(r->picture_height) : r->picture_height; + r->fb_state.nr_cbufs = 1; + r->fb_state.zsbuf = NULL; + + /* Luma filter */ + filters[0] = PIPE_TEX_FILTER_NEAREST; + /* Chroma filters */ + if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444 || + r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) { + filters[1] = PIPE_TEX_FILTER_NEAREST; + filters[2] = PIPE_TEX_FILTER_NEAREST; + } + else { + filters[1] = PIPE_TEX_FILTER_LINEAR; + filters[2] = PIPE_TEX_FILTER_LINEAR; + } + /* Fwd, bkwd ref filters */ + filters[3] = PIPE_TEX_FILTER_LINEAR; + filters[4] = PIPE_TEX_FILTER_LINEAR; + + for (i = 0; i < 5; ++i) { + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_img_filter = filters[i]; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = filters[i]; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + /*sampler.prefilter = ; */ + /*sampler.shadow_ambient = ; */ + /*sampler.lod_bias = ; */ + sampler.min_lod = 0; + /*sampler.max_lod = ; */ + /*sampler.border_color[i] = ; */ + /*sampler.max_anisotropy = ; */ + r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler); + } + + return true; +} + +static void +cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r) +{ + unsigned i; + + assert(r); + + for (i = 0; i < 5; ++i) + r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]); +} + +static bool +init_shaders(struct vl_mpeg12_mc_renderer *r) +{ + assert(r); + + create_intra_vert_shader(r); + create_intra_frag_shader(r); + create_frame_pred_vert_shader(r); + create_frame_pred_frag_shader(r); + create_frame_bi_pred_vert_shader(r); + create_frame_bi_pred_frag_shader(r); + + return true; +} + +static void +cleanup_shaders(struct vl_mpeg12_mc_renderer *r) +{ + assert(r); + + r->pipe->delete_vs_state(r->pipe, r->i_vs); + r->pipe->delete_fs_state(r->pipe, r->i_fs); + r->pipe->delete_vs_state(r->pipe, r->p_vs[0]); + r->pipe->delete_fs_state(r->pipe, r->p_fs[0]); + r->pipe->delete_vs_state(r->pipe, r->b_vs[0]); + r->pipe->delete_fs_state(r->pipe, r->b_fs[0]); +} + +static bool +init_buffers(struct vl_mpeg12_mc_renderer *r) +{ + struct pipe_texture template; + + const unsigned mbw = + align(r->picture_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH; + const unsigned mbh = + align(r->picture_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT; + + unsigned i; + + assert(r); + + r->macroblocks_per_batch = + mbw * (r->bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE ? mbh : 1); + r->num_macroblocks = 0; + r->macroblock_buf = MALLOC(r->macroblocks_per_batch * sizeof(struct pipe_mpeg12_macroblock)); + + memset(&template, 0, sizeof(struct pipe_texture)); + template.target = PIPE_TEXTURE_2D; + /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */ + template.format = PIPE_FORMAT_R16_SNORM; + template.last_level = 0; + template.width0 = r->pot_buffers ? + util_next_power_of_two(r->picture_width) : r->picture_width; + template.height0 = r->pot_buffers ? + util_next_power_of_two(r->picture_height) : r->picture_height; + template.depth0 = 1; + template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_DYNAMIC; + + r->textures.individual.y = r->pipe->screen->texture_create(r->pipe->screen, &template); + + if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) { + template.width0 = r->pot_buffers ? + util_next_power_of_two(r->picture_width / 2) : + r->picture_width / 2; + template.height0 = r->pot_buffers ? + util_next_power_of_two(r->picture_height / 2) : + r->picture_height / 2; + } + else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) + template.height0 = r->pot_buffers ? + util_next_power_of_two(r->picture_height / 2) : + r->picture_height / 2; + + r->textures.individual.cb = + r->pipe->screen->texture_create(r->pipe->screen, &template); + r->textures.individual.cr = + r->pipe->screen->texture_create(r->pipe->screen, &template); + + r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vertex2f) * 4; + r->vertex_bufs.individual.ycbcr.max_index = 24 * r->macroblocks_per_batch - 1; + r->vertex_bufs.individual.ycbcr.buffer_offset = 0; + r->vertex_bufs.individual.ycbcr.buffer = pipe_buffer_create + ( + r->pipe->screen, + DEFAULT_BUF_ALIGNMENT, + PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD, + sizeof(struct vertex2f) * 4 * 24 * r->macroblocks_per_batch + ); + + for (i = 1; i < 3; ++i) { + r->vertex_bufs.all[i].stride = sizeof(struct vertex2f) * 2; + r->vertex_bufs.all[i].max_index = 24 * r->macroblocks_per_batch - 1; + r->vertex_bufs.all[i].buffer_offset = 0; + r->vertex_bufs.all[i].buffer = pipe_buffer_create + ( + r->pipe->screen, + DEFAULT_BUF_ALIGNMENT, + PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD, + sizeof(struct vertex2f) * 2 * 24 * r->macroblocks_per_batch + ); + } + + /* Position element */ + r->vertex_elems[0].src_offset = 0; + r->vertex_elems[0].vertex_buffer_index = 0; + r->vertex_elems[0].nr_components = 2; + r->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Luma, texcoord element */ + r->vertex_elems[1].src_offset = sizeof(struct vertex2f); + r->vertex_elems[1].vertex_buffer_index = 0; + r->vertex_elems[1].nr_components = 2; + r->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Chroma Cr texcoord element */ + r->vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2; + r->vertex_elems[2].vertex_buffer_index = 0; + r->vertex_elems[2].nr_components = 2; + r->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Chroma Cb texcoord element */ + r->vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3; + r->vertex_elems[3].vertex_buffer_index = 0; + r->vertex_elems[3].nr_components = 2; + r->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* First ref surface top field texcoord element */ + r->vertex_elems[4].src_offset = 0; + r->vertex_elems[4].vertex_buffer_index = 1; + r->vertex_elems[4].nr_components = 2; + r->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* First ref surface bottom field texcoord element */ + r->vertex_elems[5].src_offset = sizeof(struct vertex2f); + r->vertex_elems[5].vertex_buffer_index = 1; + r->vertex_elems[5].nr_components = 2; + r->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Second ref surface top field texcoord element */ + r->vertex_elems[6].src_offset = 0; + r->vertex_elems[6].vertex_buffer_index = 2; + r->vertex_elems[6].nr_components = 2; + r->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Second ref surface bottom field texcoord element */ + r->vertex_elems[7].src_offset = sizeof(struct vertex2f); + r->vertex_elems[7].vertex_buffer_index = 2; + r->vertex_elems[7].nr_components = 2; + r->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT; + + r->vs_const_buf.buffer = pipe_buffer_create + ( + r->pipe->screen, + DEFAULT_BUF_ALIGNMENT, + PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD, + sizeof(struct vertex_shader_consts) + ); + + r->fs_const_buf.buffer = pipe_buffer_create + ( + r->pipe->screen, + DEFAULT_BUF_ALIGNMENT, + PIPE_BUFFER_USAGE_CONSTANT, sizeof(struct fragment_shader_consts) + ); + + memcpy + ( + pipe_buffer_map(r->pipe->screen, r->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + &fs_consts, sizeof(struct fragment_shader_consts) + ); + + pipe_buffer_unmap(r->pipe->screen, r->fs_const_buf.buffer); + + return true; +} + +static void +cleanup_buffers(struct vl_mpeg12_mc_renderer *r) +{ + unsigned i; + + assert(r); + + pipe_buffer_reference(&r->vs_const_buf.buffer, NULL); + pipe_buffer_reference(&r->fs_const_buf.buffer, NULL); + + for (i = 0; i < 3; ++i) + pipe_buffer_reference(&r->vertex_bufs.all[i].buffer, NULL); + + for (i = 0; i < 3; ++i) + pipe_texture_reference(&r->textures.all[i], NULL); + + FREE(r->macroblock_buf); +} + +static enum MACROBLOCK_TYPE +get_macroblock_type(struct pipe_mpeg12_macroblock *mb) +{ + assert(mb); + + switch (mb->mb_type) { + case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA: + return MACROBLOCK_TYPE_INTRA; + case PIPE_MPEG12_MACROBLOCK_TYPE_FWD: + return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ? + MACROBLOCK_TYPE_FWD_FRAME_PRED : MACROBLOCK_TYPE_FWD_FIELD_PRED; + case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD: + return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ? + MACROBLOCK_TYPE_BKWD_FRAME_PRED : MACROBLOCK_TYPE_BKWD_FIELD_PRED; + case PIPE_MPEG12_MACROBLOCK_TYPE_BI: + return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ? + MACROBLOCK_TYPE_BI_FRAME_PRED : MACROBLOCK_TYPE_BI_FIELD_PRED; + default: + assert(0); + } + + /* Unreachable */ + return -1; +} + +/* XXX: One of these days this will have to be killed with fire */ +#define SET_BLOCK(vb, cbp, mbx, mby, unitx, unity, ofsx, ofsy, hx, hy, lm, cbm, crm, use_zb, zb) \ + do { \ + (vb)[0].pos.x = (mbx) * (unitx) + (ofsx); (vb)[0].pos.y = (mby) * (unity) + (ofsy); \ + (vb)[1].pos.x = (mbx) * (unitx) + (ofsx); (vb)[1].pos.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[2].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].pos.y = (mby) * (unity) + (ofsy); \ + (vb)[3].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].pos.y = (mby) * (unity) + (ofsy); \ + (vb)[4].pos.x = (mbx) * (unitx) + (ofsx); (vb)[4].pos.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[5].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].pos.y = (mby) * (unity) + (ofsy) + (hy); \ + \ + if (!use_zb || (cbp) & (lm)) \ + { \ + (vb)[0].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].luma_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[1].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[2].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].luma_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[3].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].luma_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[4].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[5].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + } \ + else \ + { \ + (vb)[0].luma_tc.x = (zb)[0].x; (vb)[0].luma_tc.y = (zb)[0].y; \ + (vb)[1].luma_tc.x = (zb)[0].x; (vb)[1].luma_tc.y = (zb)[0].y + (hy); \ + (vb)[2].luma_tc.x = (zb)[0].x + (hx); (vb)[2].luma_tc.y = (zb)[0].y; \ + (vb)[3].luma_tc.x = (zb)[0].x + (hx); (vb)[3].luma_tc.y = (zb)[0].y; \ + (vb)[4].luma_tc.x = (zb)[0].x; (vb)[4].luma_tc.y = (zb)[0].y + (hy); \ + (vb)[5].luma_tc.x = (zb)[0].x + (hx); (vb)[5].luma_tc.y = (zb)[0].y + (hy); \ + } \ + \ + if (!use_zb || (cbp) & (cbm)) \ + { \ + (vb)[0].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cb_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[1].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[2].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cb_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[3].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cb_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[4].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[5].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + } \ + else \ + { \ + (vb)[0].cb_tc.x = (zb)[1].x; (vb)[0].cb_tc.y = (zb)[1].y; \ + (vb)[1].cb_tc.x = (zb)[1].x; (vb)[1].cb_tc.y = (zb)[1].y + (hy); \ + (vb)[2].cb_tc.x = (zb)[1].x + (hx); (vb)[2].cb_tc.y = (zb)[1].y; \ + (vb)[3].cb_tc.x = (zb)[1].x + (hx); (vb)[3].cb_tc.y = (zb)[1].y; \ + (vb)[4].cb_tc.x = (zb)[1].x; (vb)[4].cb_tc.y = (zb)[1].y + (hy); \ + (vb)[5].cb_tc.x = (zb)[1].x + (hx); (vb)[5].cb_tc.y = (zb)[1].y + (hy); \ + } \ + \ + if (!use_zb || (cbp) & (crm)) \ + { \ + (vb)[0].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cr_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[1].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[2].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cr_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[3].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cr_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[4].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[5].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + } \ + else \ + { \ + (vb)[0].cr_tc.x = (zb)[2].x; (vb)[0].cr_tc.y = (zb)[2].y; \ + (vb)[1].cr_tc.x = (zb)[2].x; (vb)[1].cr_tc.y = (zb)[2].y + (hy); \ + (vb)[2].cr_tc.x = (zb)[2].x + (hx); (vb)[2].cr_tc.y = (zb)[2].y; \ + (vb)[3].cr_tc.x = (zb)[2].x + (hx); (vb)[3].cr_tc.y = (zb)[2].y; \ + (vb)[4].cr_tc.x = (zb)[2].x; (vb)[4].cr_tc.y = (zb)[2].y + (hy); \ + (vb)[5].cr_tc.x = (zb)[2].x + (hx); (vb)[5].cr_tc.y = (zb)[2].y + (hy); \ + } \ + } while (0) + +static void +gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r, + struct pipe_mpeg12_macroblock *mb, unsigned pos, + struct vert_stream_0 *ycbcr_vb, struct vertex2f **ref_vb) +{ + struct vertex2f mo_vec[2]; + + unsigned i; + + assert(r); + assert(mb); + assert(ycbcr_vb); + assert(pos < r->macroblocks_per_batch); + + switch (mb->mb_type) { + case PIPE_MPEG12_MACROBLOCK_TYPE_BI: + { + struct vertex2f *vb; + + assert(ref_vb && ref_vb[1]); + + vb = ref_vb[1] + pos * 2 * 24; + + mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x; + mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y; + + if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) { + for (i = 0; i < 24 * 2; i += 2) { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + } + } + else { + mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x; + mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y; + + for (i = 0; i < 24 * 2; i += 2) { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + vb[i + 1].x = mo_vec[1].x; + vb[i + 1].y = mo_vec[1].y; + } + } + + /* fall-through */ + } + case PIPE_MPEG12_MACROBLOCK_TYPE_FWD: + case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD: + { + struct vertex2f *vb; + + assert(ref_vb && ref_vb[0]); + + vb = ref_vb[0] + pos * 2 * 24; + + if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) { + mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x; + mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y; + + if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) { + mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x; + mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y; + } + } + else { + mo_vec[0].x = mb->pmv[0][0][0] * 0.5f * r->surface_tex_inv_size.x; + mo_vec[0].y = mb->pmv[0][0][1] * 0.5f * r->surface_tex_inv_size.y; + + if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) { + mo_vec[1].x = mb->pmv[1][0][0] * 0.5f * r->surface_tex_inv_size.x; + mo_vec[1].y = mb->pmv[1][0][1] * 0.5f * r->surface_tex_inv_size.y; + } + } + + if (mb->mb_type == PIPE_MPEG12_MOTION_TYPE_FRAME) { + for (i = 0; i < 24 * 2; i += 2) { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + } + } + else { + for (i = 0; i < 24 * 2; i += 2) { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + vb[i + 1].x = mo_vec[1].x; + vb[i + 1].y = mo_vec[1].y; + } + } + + /* fall-through */ + } + case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA: + { + const struct vertex2f unit = + { + r->surface_tex_inv_size.x * MACROBLOCK_WIDTH, + r->surface_tex_inv_size.y * MACROBLOCK_HEIGHT + }; + const struct vertex2f half = + { + r->surface_tex_inv_size.x * (MACROBLOCK_WIDTH / 2), + r->surface_tex_inv_size.y * (MACROBLOCK_HEIGHT / 2) + }; + const bool use_zb = r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE; + + struct vert_stream_0 *vb = ycbcr_vb + pos * 24; + + SET_BLOCK(vb, mb->cbp, mb->mbx, mb->mby, + unit.x, unit.y, 0, 0, half.x, half.y, + 32, 2, 1, use_zb, r->zero_block); + + SET_BLOCK(vb + 6, mb->cbp, mb->mbx, mb->mby, + unit.x, unit.y, half.x, 0, half.x, half.y, + 16, 2, 1, use_zb, r->zero_block); + + SET_BLOCK(vb + 12, mb->cbp, mb->mbx, mb->mby, + unit.x, unit.y, 0, half.y, half.x, half.y, + 8, 2, 1, use_zb, r->zero_block); + + SET_BLOCK(vb + 18, mb->cbp, mb->mbx, mb->mby, + unit.x, unit.y, half.x, half.y, half.x, half.y, + 4, 2, 1, use_zb, r->zero_block); + + break; + } + default: + assert(0); + } +} + +static void +gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r, + unsigned *num_macroblocks) +{ + unsigned offset[NUM_MACROBLOCK_TYPES]; + struct vert_stream_0 *ycbcr_vb; + struct vertex2f *ref_vb[2]; + unsigned i; + + assert(r); + assert(num_macroblocks); + + for (i = 0; i < r->num_macroblocks; ++i) { + enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]); + ++num_macroblocks[mb_type]; + } + + offset[0] = 0; + + for (i = 1; i < NUM_MACROBLOCK_TYPES; ++i) + offset[i] = offset[i - 1] + num_macroblocks[i - 1]; + + ycbcr_vb = (struct vert_stream_0 *)pipe_buffer_map + ( + r->pipe->screen, + r->vertex_bufs.individual.ycbcr.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD + ); + + for (i = 0; i < 2; ++i) + ref_vb[i] = (struct vertex2f *)pipe_buffer_map + ( + r->pipe->screen, + r->vertex_bufs.individual.ref[i].buffer, + PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD + ); + + for (i = 0; i < r->num_macroblocks; ++i) { + enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]); + + gen_macroblock_verts(r, &r->macroblock_buf[i], offset[mb_type], + ycbcr_vb, ref_vb); + + ++offset[mb_type]; + } + + pipe_buffer_unmap(r->pipe->screen, r->vertex_bufs.individual.ycbcr.buffer); + for (i = 0; i < 2; ++i) + pipe_buffer_unmap(r->pipe->screen, r->vertex_bufs.individual.ref[i].buffer); +} + +static void +flush(struct vl_mpeg12_mc_renderer *r) +{ + unsigned num_macroblocks[NUM_MACROBLOCK_TYPES] = { 0 }; + unsigned vb_start = 0; + struct vertex_shader_consts *vs_consts; + unsigned i; + + assert(r); + assert(r->num_macroblocks == r->macroblocks_per_batch); + + gen_macroblock_stream(r, num_macroblocks); + + r->fb_state.cbufs[0] = r->pipe->screen->get_tex_surface + ( + r->pipe->screen, r->surface, + 0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE + ); + + r->pipe->set_framebuffer_state(r->pipe, &r->fb_state); + r->pipe->set_viewport_state(r->pipe, &r->viewport); + r->pipe->set_scissor_state(r->pipe, &r->scissor); + + vs_consts = pipe_buffer_map + ( + r->pipe->screen, r->vs_const_buf.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD + ); + + vs_consts->denorm.x = r->surface->width0; + vs_consts->denorm.y = r->surface->height0; + + pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf.buffer); + + r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0, + &r->vs_const_buf); + r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_FRAGMENT, 0, + &r->fs_const_buf); + + if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) { + r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all); + r->pipe->set_vertex_elements(r->pipe, 4, r->vertex_elems); + r->pipe->set_fragment_sampler_textures(r->pipe, 3, r->textures.all); + r->pipe->bind_fragment_sampler_states(r->pipe, 3, r->samplers.all); + r->pipe->bind_vs_state(r->pipe, r->i_vs); + r->pipe->bind_fs_state(r->pipe, r->i_fs); + + r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start, + num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24); + vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24; + } + + if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) { + r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all); + r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems); + r->textures.individual.ref[0] = r->past; + r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all); + r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all); + r->pipe->bind_vs_state(r->pipe, r->p_vs[0]); + r->pipe->bind_fs_state(r->pipe, r->p_fs[0]); + + r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start, + num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24); + vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24; + } + + if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) { + r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all); + r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems); + r->textures.individual.ref[0] = r->past; + r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all); + r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all); + r->pipe->bind_vs_state(r->pipe, r->p_vs[1]); + r->pipe->bind_fs_state(r->pipe, r->p_fs[1]); + + r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start, + num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24); + vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24; + } + + if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) { + r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all); + r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems); + r->textures.individual.ref[0] = r->future; + r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all); + r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all); + r->pipe->bind_vs_state(r->pipe, r->p_vs[0]); + r->pipe->bind_fs_state(r->pipe, r->p_fs[0]); + + r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start, + num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24); + vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24; + } + + if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0 */ ) { + r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all); + r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems); + r->textures.individual.ref[0] = r->future; + r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all); + r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all); + r->pipe->bind_vs_state(r->pipe, r->p_vs[1]); + r->pipe->bind_fs_state(r->pipe, r->p_fs[1]); + + r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start, + num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24); + vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24; + } + + if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) { + r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all); + r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems); + r->textures.individual.ref[0] = r->past; + r->textures.individual.ref[1] = r->future; + r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all); + r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all); + r->pipe->bind_vs_state(r->pipe, r->b_vs[0]); + r->pipe->bind_fs_state(r->pipe, r->b_fs[0]); + + r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start, + num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24); + vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24; + } + + if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) { + r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all); + r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems); + r->textures.individual.ref[0] = r->past; + r->textures.individual.ref[1] = r->future; + r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all); + r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all); + r->pipe->bind_vs_state(r->pipe, r->b_vs[1]); + r->pipe->bind_fs_state(r->pipe, r->b_fs[1]); + + r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start, + num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24); + vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24; + } + + r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence); + pipe_surface_reference(&r->fb_state.cbufs[0], NULL); + + if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) + for (i = 0; i < 3; ++i) + r->zero_block[i].x = ZERO_BLOCK_NIL; + + r->num_macroblocks = 0; +} + +static void +grab_frame_coded_block(short *src, short *dst, unsigned dst_pitch) +{ + unsigned y; + + assert(src); + assert(dst); + + for (y = 0; y < BLOCK_HEIGHT; ++y) + memcpy(dst + y * dst_pitch, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2); +} + +static void +grab_field_coded_block(short *src, short *dst, unsigned dst_pitch) +{ + unsigned y; + + assert(src); + assert(dst); + + for (y = 0; y < BLOCK_HEIGHT; ++y) + memcpy(dst + y * dst_pitch * 2, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2); +} + +static void +fill_zero_block(short *dst, unsigned dst_pitch) +{ + unsigned y; + + assert(dst); + + for (y = 0; y < BLOCK_HEIGHT; ++y) + memset(dst + y * dst_pitch, 0, BLOCK_WIDTH * 2); +} + +static void +grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby, + enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks) +{ + unsigned tex_pitch; + short *texels; + unsigned tb = 0, sb = 0; + unsigned mbpx = mbx * MACROBLOCK_WIDTH, mbpy = mby * MACROBLOCK_HEIGHT; + unsigned x, y; + + assert(r); + assert(blocks); + + tex_pitch = r->tex_transfer[0]->stride / pf_get_blocksize(r->tex_transfer[0]->texture->format); + texels = r->texels[0] + mbpy * tex_pitch + mbpx; + + for (y = 0; y < 2; ++y) { + for (x = 0; x < 2; ++x, ++tb) { + if ((cbp >> (5 - tb)) & 1) { + if (dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) { + grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, + texels + y * tex_pitch * BLOCK_WIDTH + + x * BLOCK_WIDTH, tex_pitch); + } + else { + grab_field_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, + texels + y * tex_pitch + x * BLOCK_WIDTH, + tex_pitch); + } + + ++sb; + } + else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) { + if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL || + ZERO_BLOCK_IS_NIL(r->zero_block[0])) { + fill_zero_block(texels + y * tex_pitch * BLOCK_WIDTH + x * BLOCK_WIDTH, tex_pitch); + if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) { + r->zero_block[0].x = (mbpx + x * 8) * r->surface_tex_inv_size.x; + r->zero_block[0].y = (mbpy + y * 8) * r->surface_tex_inv_size.y; + } + } + } + } + } + + /* TODO: Implement 422, 444 */ + assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420); + + mbpx /= 2; + mbpy /= 2; + + for (tb = 0; tb < 2; ++tb) { + tex_pitch = r->tex_transfer[tb + 1]->stride / pf_get_blocksize(r->tex_transfer[tb + 1]->texture->format); + texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx; + + if ((cbp >> (1 - tb)) & 1) { + grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, texels, tex_pitch); + ++sb; + } + else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) { + if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL || + ZERO_BLOCK_IS_NIL(r->zero_block[tb + 1])) { + fill_zero_block(texels, tex_pitch); + if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) { + r->zero_block[tb + 1].x = (mbpx << 1) * r->surface_tex_inv_size.x; + r->zero_block[tb + 1].y = (mbpy << 1) * r->surface_tex_inv_size.y; + } + } + } + } +} + +static void +grab_macroblock(struct vl_mpeg12_mc_renderer *r, + struct pipe_mpeg12_macroblock *mb) +{ + assert(r); + assert(mb); + assert(r->num_macroblocks < r->macroblocks_per_batch); + + memcpy(&r->macroblock_buf[r->num_macroblocks], mb, + sizeof(struct pipe_mpeg12_macroblock)); + + grab_blocks(r, mb->mbx, mb->mby, mb->dct_type, mb->cbp, mb->blocks); + + ++r->num_macroblocks; +} + +bool +vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer, + struct pipe_context *pipe, + unsigned picture_width, + unsigned picture_height, + enum pipe_video_chroma_format chroma_format, + enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode, + enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling, + bool pot_buffers) +{ + unsigned i; + + assert(renderer); + assert(pipe); + /* TODO: Implement other policies */ + assert(bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE); + /* TODO: Implement this */ + /* XXX: XFER_ALL sampling issue at block edges when using bilinear filtering */ + assert(eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE); + /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */ + assert(pot_buffers); + + memset(renderer, 0, sizeof(struct vl_mpeg12_mc_renderer)); + + renderer->pipe = pipe; + renderer->picture_width = picture_width; + renderer->picture_height = picture_height; + renderer->chroma_format = chroma_format; + renderer->bufmode = bufmode; + renderer->eb_handling = eb_handling; + renderer->pot_buffers = pot_buffers; + + if (!init_pipe_state(renderer)) + return false; + if (!init_shaders(renderer)) { + cleanup_pipe_state(renderer); + return false; + } + if (!init_buffers(renderer)) { + cleanup_shaders(renderer); + cleanup_pipe_state(renderer); + return false; + } + + renderer->surface = NULL; + renderer->past = NULL; + renderer->future = NULL; + for (i = 0; i < 3; ++i) + renderer->zero_block[i].x = ZERO_BLOCK_NIL; + renderer->num_macroblocks = 0; + + xfer_buffers_map(renderer); + + return true; +} + +void +vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer) +{ + assert(renderer); + + xfer_buffers_unmap(renderer); + + cleanup_pipe_state(renderer); + cleanup_shaders(renderer); + cleanup_buffers(renderer); +} + +void +vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer + *renderer, + struct pipe_texture *surface, + struct pipe_texture *past, + struct pipe_texture *future, + unsigned num_macroblocks, + struct pipe_mpeg12_macroblock + *mpeg12_macroblocks, + struct pipe_fence_handle **fence) +{ + bool new_surface = false; + + assert(renderer); + assert(surface); + assert(num_macroblocks); + assert(mpeg12_macroblocks); + + if (renderer->surface) { + if (surface != renderer->surface) { + if (renderer->num_macroblocks > 0) { + xfer_buffers_unmap(renderer); + flush(renderer); + } + + new_surface = true; + } + + /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */ + assert(surface != renderer->surface || renderer->past == past); + assert(surface != renderer->surface || renderer->future == future); + } + else + new_surface = true; + + if (new_surface) { + renderer->surface = surface; + renderer->past = past; + renderer->future = future; + renderer->fence = fence; + renderer->surface_tex_inv_size.x = 1.0f / surface->width0; + renderer->surface_tex_inv_size.y = 1.0f / surface->height0; + } + + while (num_macroblocks) { + unsigned left_in_batch = renderer->macroblocks_per_batch - renderer->num_macroblocks; + unsigned num_to_submit = MIN2(num_macroblocks, left_in_batch); + unsigned i; + + for (i = 0; i < num_to_submit; ++i) { + assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12); + grab_macroblock(renderer, &mpeg12_macroblocks[i]); + } + + num_macroblocks -= num_to_submit; + + if (renderer->num_macroblocks == renderer->macroblocks_per_batch) { + xfer_buffers_unmap(renderer); + flush(renderer); + xfer_buffers_map(renderer); + /* Next time we get this surface it may have new ref frames */ + renderer->surface = NULL; + } + } +} diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h new file mode 100644 index 0000000000..64184337a0 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h @@ -0,0 +1,121 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef vl_mpeg12_mc_renderer_h +#define vl_mpeg12_mc_renderer_h + +#include <pipe/p_compiler.h> +#include <pipe/p_state.h> +#include <pipe/p_video_state.h> + +struct pipe_context; +struct pipe_video_surface; +struct pipe_macroblock; + +/* A slice is video-width (rounded up to a multiple of macroblock width) x macroblock height */ +enum VL_MPEG12_MC_RENDERER_BUFFER_MODE +{ + VL_MPEG12_MC_RENDERER_BUFFER_SLICE, /* Saves memory at the cost of smaller batches */ + VL_MPEG12_MC_RENDERER_BUFFER_PICTURE /* Larger batches, more memory */ +}; + +enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK +{ + VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL, /* Waste of memory bandwidth */ + VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE, /* Can only do point-filtering when interpolating subsampled chroma channels */ + VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE /* Needs conditional texel fetch! */ +}; + +struct vl_mpeg12_mc_renderer +{ + struct pipe_context *pipe; + unsigned picture_width; + unsigned picture_height; + enum pipe_video_chroma_format chroma_format; + enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode; + enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling; + bool pot_buffers; + unsigned macroblocks_per_batch; + + struct pipe_viewport_state viewport; + struct pipe_scissor_state scissor; + struct pipe_constant_buffer vs_const_buf; + struct pipe_constant_buffer fs_const_buf; + struct pipe_framebuffer_state fb_state; + struct pipe_vertex_element vertex_elems[8]; + + union + { + void *all[5]; + struct { void *y, *cb, *cr, *ref[2]; } individual; + } samplers; + + void *i_vs, *p_vs[2], *b_vs[2]; + void *i_fs, *p_fs[2], *b_fs[2]; + + union + { + struct pipe_texture *all[5]; + struct { struct pipe_texture *y, *cb, *cr, *ref[2]; } individual; + } textures; + + union + { + struct pipe_vertex_buffer all[3]; + struct { struct pipe_vertex_buffer ycbcr, ref[2]; } individual; + } vertex_bufs; + + struct pipe_texture *surface, *past, *future; + struct pipe_fence_handle **fence; + unsigned num_macroblocks; + struct pipe_mpeg12_macroblock *macroblock_buf; + struct pipe_transfer *tex_transfer[3]; + short *texels[3]; + struct { float x, y; } surface_tex_inv_size; + struct { float x, y; } zero_block[3]; +}; + +bool vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer, + struct pipe_context *pipe, + unsigned picture_width, + unsigned picture_height, + enum pipe_video_chroma_format chroma_format, + enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode, + enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling, + bool pot_buffers); + +void vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer); + +void vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer *renderer, + struct pipe_texture *surface, + struct pipe_texture *past, + struct pipe_texture *future, + unsigned num_macroblocks, + struct pipe_mpeg12_macroblock *mpeg12_macroblocks, + struct pipe_fence_handle **fence); + +#endif /* vl_mpeg12_mc_renderer_h */ diff --git a/src/gallium/auxiliary/vl/vl_shader_build.c b/src/gallium/auxiliary/vl/vl_shader_build.c new file mode 100644 index 0000000000..d011ef97bd --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_shader_build.c @@ -0,0 +1,243 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "vl_shader_build.h" +#include <assert.h> +#include <tgsi/tgsi_parse.h> +#include <tgsi/tgsi_build.h> + +struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index, unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.Name = name; + decl.Semantic.Index = index; + decl.Range.First = first; + decl.Range.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_interpolated_input +( + unsigned int name, + unsigned int index, + unsigned int first, + unsigned int last, + int interpolation +) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + assert + ( + interpolation == TGSI_INTERPOLATE_CONSTANT || + interpolation == TGSI_INTERPOLATE_LINEAR || + interpolation == TGSI_INTERPOLATE_PERSPECTIVE + ); + + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.Name = name; + decl.Semantic.Index = index; + decl.Declaration.Interpolate = interpolation;; + decl.Range.First = first; + decl.Range.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int index, unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl.Declaration.File = TGSI_FILE_CONSTANT; + decl.Declaration.Semantic = 1; + decl.Semantic.Name = name; + decl.Semantic.Index = index; + decl.Range.First = first; + decl.Range.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int index, unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.Name = name; + decl.Semantic.Index = index; + decl.Range.First = first; + decl.Range.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_temps(unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.Range.First = first; + decl.Range.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.Range.First = first; + decl.Range.Last = last; + + return decl; +} + +struct tgsi_full_instruction vl_inst2 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src_file, + unsigned int src_index +) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = opcode; + inst.Instruction.NumDstRegs = 1; + inst.Dst[0].Register.File = dst_file; + inst.Dst[0].Register.Index = dst_index; + inst.Instruction.NumSrcRegs = 1; + inst.Src[0].Register.File = src_file; + inst.Src[0].Register.Index = src_index; + + return inst; +} + +struct tgsi_full_instruction vl_inst3 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index +) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = opcode; + inst.Instruction.NumDstRegs = 1; + inst.Dst[0].Register.File = dst_file; + inst.Dst[0].Register.Index = dst_index; + inst.Instruction.NumSrcRegs = 2; + inst.Src[0].Register.File = src1_file; + inst.Src[0].Register.Index = src1_index; + inst.Src[1].Register.File = src2_file; + inst.Src[1].Register.Index = src2_index; + + return inst; +} + +struct tgsi_full_instruction vl_tex +( + int tex, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index +) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = TGSI_OPCODE_TEX; + inst.Instruction.NumDstRegs = 1; + inst.Dst[0].Register.File = dst_file; + inst.Dst[0].Register.Index = dst_index; + inst.Instruction.NumSrcRegs = 2; + inst.Instruction.Texture = 1; + inst.Texture.Texture = tex; + inst.Src[0].Register.File = src1_file; + inst.Src[0].Register.Index = src1_index; + inst.Src[1].Register.File = src2_file; + inst.Src[1].Register.Index = src2_index; + + return inst; +} + +struct tgsi_full_instruction vl_inst4 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index, + enum tgsi_file_type src3_file, + unsigned int src3_index +) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = opcode; + inst.Instruction.NumDstRegs = 1; + inst.Dst[0].Register.File = dst_file; + inst.Dst[0].Register.Index = dst_index; + inst.Instruction.NumSrcRegs = 3; + inst.Src[0].Register.File = src1_file; + inst.Src[0].Register.Index = src1_index; + inst.Src[1].Register.File = src2_file; + inst.Src[1].Register.Index = src2_index; + inst.Src[2].Register.File = src3_file; + inst.Src[2].Register.Index = src3_index; + + return inst; +} + +struct tgsi_full_instruction vl_end(void) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + + return inst; +} diff --git a/src/gallium/auxiliary/vl/vl_shader_build.h b/src/gallium/auxiliary/vl/vl_shader_build.h new file mode 100644 index 0000000000..5da71f8e13 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_shader_build.h @@ -0,0 +1,88 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef vl_shader_build_h +#define vl_shader_build_h + +#include <pipe/p_shader_tokens.h> + +struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index, unsigned int first, unsigned int last); +struct tgsi_full_declaration vl_decl_interpolated_input +( + unsigned int name, + unsigned int index, + unsigned int first, + unsigned int last, + int interpolation +); +struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int index, unsigned int first, unsigned int last); +struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int index, unsigned int first, unsigned int last); +struct tgsi_full_declaration vl_decl_temps(unsigned int first, unsigned int last); +struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int last); +struct tgsi_full_instruction vl_inst2 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src_file, + unsigned int src_index +); +struct tgsi_full_instruction vl_inst3 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index +); +struct tgsi_full_instruction vl_tex +( + int tex, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index +); +struct tgsi_full_instruction vl_inst4 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index, + enum tgsi_file_type src3_file, + unsigned int src3_index +); +struct tgsi_full_instruction vl_end(void); + +#endif |