diff options
Diffstat (limited to 'src')
119 files changed, 7597 insertions, 3124 deletions
diff --git a/src/gallium/Makefile b/src/gallium/Makefile index 568747e157..aa77021daf 100644 --- a/src/gallium/Makefile +++ b/src/gallium/Makefile @@ -19,3 +19,7 @@ subdirs: clean: rm -f `find . -name \*.[oa]` rm -f `find . -name depend` + + +# Dummy install target +install: diff --git a/src/gallium/README.portability b/src/gallium/README.portability index c70ca774da..ab0c197847 100644 --- a/src/gallium/README.portability +++ b/src/gallium/README.portability @@ -39,5 +39,6 @@ portable way. == Debugging == -TODO +* Use the functions/macros in p_debug.h. +* Don't include assert.h, call abort, printf, etc. diff --git a/src/gallium/SConscript b/src/gallium/SConscript index fa4833cbcf..f09778ce99 100644 --- a/src/gallium/SConscript +++ b/src/gallium/SConscript @@ -9,6 +9,9 @@ auxiliaries = [] Export('auxiliaries') +if llvm: + SConscript(['auxiliary/gallivm/SConscript']) + SConscript([ # NOTE: order matters! 'auxiliary/util/SConscript', @@ -19,8 +22,5 @@ SConscript([ 'auxiliary/pipebuffer/SConscript', ]) -if llvm: - SConscript(['auxiliary/gallivm/SConscript']) - for driver in env['drivers']: SConscript(os.path.join('drivers', driver, 'SConscript')) diff --git a/src/gallium/auxiliary/cso_cache/Makefile b/src/gallium/auxiliary/cso_cache/Makefile index 3e49266163..6bd6602088 100644 --- a/src/gallium/auxiliary/cso_cache/Makefile +++ b/src/gallium/auxiliary/cso_cache/Makefile @@ -4,6 +4,7 @@ include $(TOP)/configs/current LIBNAME = cso_cache C_SOURCES = \ + cso_context.c \ cso_cache.c \ cso_hash.c diff --git a/src/gallium/auxiliary/cso_cache/SConscript b/src/gallium/auxiliary/cso_cache/SConscript index 9751881613..651e68a191 100644 --- a/src/gallium/auxiliary/cso_cache/SConscript +++ b/src/gallium/auxiliary/cso_cache/SConscript @@ -3,6 +3,7 @@ Import('*') cso_cache = env.ConvenienceLibrary( target = 'cso_cache', source = [ + 'cso_context.c', 'cso_cache.c', 'cso_hash.c', ]) diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c index b427b509f8..a2764b4265 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.c +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -207,8 +207,11 @@ static INLINE void sanitize_hash(struct cso_hash *hash, enum cso_cache_type type { /* if we're approach the maximum size, remove fourth of the entries * otherwise every subsequent call will go through the same */ - int max_entries = (max_size > cso_hash_size(hash)) ? max_size : cso_hash_size(hash); + int hash_size = cso_hash_size(hash); + int max_entries = (max_size > hash_size) ? max_size : hash_size; int to_remove = (max_size < max_entries) * max_entries/4; + if (hash_size > max_size) + to_remove += hash_size - max_size; while (to_remove) { /*remove elements until we're good */ /*fixme: currently we pick the nodes to remove at random*/ diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.h b/src/gallium/auxiliary/cso_cache/cso_cache.h index 44ee128a4a..e5edbbb556 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.h +++ b/src/gallium/auxiliary/cso_cache/cso_cache.h @@ -84,47 +84,49 @@ extern "C" { #endif +typedef void (*cso_state_callback)(void *ctx, void *obj); + struct cso_cache; struct cso_blend { struct pipe_blend_state state; void *data; - void (*delete_state)(struct pipe_context *, void *); + cso_state_callback delete_state; struct pipe_context *context; }; struct cso_depth_stencil_alpha { struct pipe_depth_stencil_alpha_state state; void *data; - void (*delete_state)(struct pipe_context *, void *); + cso_state_callback delete_state; struct pipe_context *context; }; struct cso_rasterizer { struct pipe_rasterizer_state state; void *data; - void (*delete_state)(struct pipe_context *, void *); + cso_state_callback delete_state; struct pipe_context *context; }; struct cso_fragment_shader { struct pipe_shader_state state; void *data; - void (*delete_state)(struct pipe_context *, void *); + cso_state_callback delete_state; struct pipe_context *context; }; struct cso_vertex_shader { struct pipe_shader_state state; void *data; - void (*delete_state)(struct pipe_context *, void *); + cso_state_callback delete_state; struct pipe_context *context; }; struct cso_sampler { struct pipe_sampler_state state; void *data; - void (*delete_state)(struct pipe_context *, void *); + cso_state_callback delete_state; struct pipe_context *context; }; @@ -138,8 +140,6 @@ enum cso_cache_type { CSO_VERTEX_SHADER }; -typedef void (*cso_state_callback)(void *, void *); - unsigned cso_construct_key(void *item, int item_size); struct cso_cache *cso_cache_create(void); diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c new file mode 100644 index 0000000000..f7f4aebb16 --- /dev/null +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -0,0 +1,354 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* Wrap the cso cache & hash mechanisms in a simplified + * pipe-driver-specific interface. + * + * Authors: + * Zack Rusin <zack@tungstengraphics.com> + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "pipe/p_state.h" +#include "pipe/p_util.h" + +#include "cso_cache/cso_context.h" +#include "cso_cache/cso_cache.h" +#include "cso_cache/cso_hash.h" + +struct cso_context { + struct pipe_context *pipe; + struct cso_cache *cache; + + struct { + void *samplers[PIPE_MAX_SAMPLERS]; + unsigned nr_samplers; + } hw; + + void *samplers[PIPE_MAX_SAMPLERS]; + unsigned nr_samplers; + + void *blend; + void *depth_stencil; + void *rasterizer; + void *fragment_shader; + void *vertex_shader; +}; + + +struct cso_context *cso_create_context( struct pipe_context *pipe ) +{ + struct cso_context *ctx = CALLOC_STRUCT(cso_context); + if (ctx == NULL) + goto out; + + ctx->cache = cso_cache_create(); + if (ctx->cache == NULL) + goto out; + + ctx->pipe = pipe; + + /* Enable for testing: */ + if (0) cso_set_maximum_cache_size( ctx->cache, 4 ); + + return ctx; + +out: + cso_destroy_context( ctx ); + return NULL; +} + +static void cso_release_all( struct cso_context *ctx ) +{ + if (ctx->pipe) { + ctx->pipe->bind_blend_state( ctx->pipe, NULL ); + ctx->pipe->bind_rasterizer_state( ctx->pipe, NULL ); + ctx->pipe->bind_sampler_states( ctx->pipe, 0, NULL ); + ctx->pipe->bind_depth_stencil_alpha_state( ctx->pipe, NULL ); + ctx->pipe->bind_fs_state( ctx->pipe, NULL ); + ctx->pipe->bind_vs_state( ctx->pipe, NULL ); + } + + if (ctx->cache) { + cso_cache_delete( ctx->cache ); + ctx->cache = NULL; + } +} + + +void cso_destroy_context( struct cso_context *ctx ) +{ + debug_printf("%s\n", __FUNCTION__); + + if (ctx) + cso_release_all( ctx ); + + FREE( ctx ); +} + + +/* Those function will either find the state of the given template + * in the cache or they will create a new state from the given + * template, insert it in the cache and return it. + */ + +/* + * If the driver returns 0 from the create method then they will assign + * the data member of the cso to be the template itself. + */ + +void cso_set_blend(struct cso_context *ctx, + const struct pipe_blend_state *templ) +{ + unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_blend_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, CSO_BLEND, + (void*)templ); + void *handle; + + if (cso_hash_iter_is_null(iter)) { + struct cso_blend *cso = MALLOC(sizeof(struct cso_blend)); + + cso->state = *templ; + cso->data = ctx->pipe->create_blend_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_blend_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_BLEND, cso); + handle = cso->data; + } + else { + handle = ((struct cso_blend *)cso_hash_iter_data(iter))->data; + } + + if (ctx->blend != handle) { + ctx->blend = handle; + ctx->pipe->bind_blend_state(ctx->pipe, handle); + } +} + +void cso_single_sampler(struct cso_context *ctx, + unsigned idx, + const struct pipe_sampler_state *templ) +{ + void *handle = NULL; + + if (templ != NULL) { + unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_sampler_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, CSO_SAMPLER, + (void*)templ); + + if (cso_hash_iter_is_null(iter)) { + struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler)); + + cso->state = *templ; + cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_sampler_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_SAMPLER, cso); + handle = cso->data; + } + else { + handle = ((struct cso_sampler *)cso_hash_iter_data(iter))->data; + } + } + + ctx->samplers[idx] = handle; +} + +void cso_single_sampler_done( struct cso_context *ctx ) +{ + unsigned i; + + for (i = 0; i < 8; i++) + if (ctx->samplers[i] == NULL) + break; + + ctx->nr_samplers = i; + + if (ctx->hw.nr_samplers != ctx->nr_samplers || + memcmp(ctx->hw.samplers, + ctx->samplers, + ctx->nr_samplers * sizeof(void *)) != 0) + { + memcpy(ctx->hw.samplers, ctx->samplers, ctx->nr_samplers * sizeof(void *)); + ctx->hw.nr_samplers = ctx->nr_samplers; + + ctx->pipe->bind_sampler_states(ctx->pipe, ctx->nr_samplers, ctx->samplers); + } +} + +void cso_set_samplers( struct cso_context *ctx, + unsigned nr, + const struct pipe_sampler_state **templates ) +{ + unsigned i; + + /* TODO: fastpath + */ + + for (i = 0; i < nr; i++) + cso_single_sampler( ctx, i, templates[i] ); + + for ( ; i < ctx->nr_samplers; i++) + cso_single_sampler( ctx, i, NULL ); + + cso_single_sampler_done( ctx ); +} + +void cso_set_depth_stencil_alpha(struct cso_context *ctx, + const struct pipe_depth_stencil_alpha_state *templ) +{ + unsigned hash_key = cso_construct_key((void*)templ, + sizeof(struct pipe_depth_stencil_alpha_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, + CSO_DEPTH_STENCIL_ALPHA, + (void*)templ); + void *handle; + + if (cso_hash_iter_is_null(iter)) { + struct cso_depth_stencil_alpha *cso = MALLOC(sizeof(struct cso_depth_stencil_alpha)); + + cso->state = *templ; + cso->data = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_depth_stencil_alpha_state; + cso->context = ctx->pipe; + + cso_insert_state(ctx->cache, hash_key, CSO_DEPTH_STENCIL_ALPHA, cso); + handle = cso->data; + } + else { + handle = ((struct cso_depth_stencil_alpha *)cso_hash_iter_data(iter))->data; + } + + if (ctx->depth_stencil != handle) { + ctx->depth_stencil = handle; + ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, handle); + } +} + + + +void cso_set_rasterizer(struct cso_context *ctx, + const struct pipe_rasterizer_state *templ) +{ + unsigned hash_key = cso_construct_key((void*)templ, + sizeof(struct pipe_rasterizer_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, CSO_RASTERIZER, + (void*)templ); + void *handle = NULL; + + if (cso_hash_iter_is_null(iter)) { + struct cso_rasterizer *cso = MALLOC(sizeof(struct cso_rasterizer)); + + cso->state = *templ; + cso->data = ctx->pipe->create_rasterizer_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_rasterizer_state; + cso->context = ctx->pipe; + + cso_insert_state(ctx->cache, hash_key, CSO_RASTERIZER, cso); + handle = cso->data; + } + else { + handle = ((struct cso_rasterizer *)cso_hash_iter_data(iter))->data; + } + + if (ctx->rasterizer != handle) { + ctx->rasterizer = handle; + ctx->pipe->bind_rasterizer_state(ctx->pipe, handle); + } +} + + + + + +void cso_set_fragment_shader(struct cso_context *ctx, + const struct pipe_shader_state *templ) +{ + unsigned hash_key = cso_construct_key((void*)templ, + sizeof(struct pipe_shader_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, CSO_FRAGMENT_SHADER, + (void*)templ); + void *handle = NULL; + + if (cso_hash_iter_is_null(iter)) { + struct cso_fragment_shader *cso = MALLOC(sizeof(struct cso_fragment_shader)); + + cso->state = *templ; + cso->data = ctx->pipe->create_fs_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_fs_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_FRAGMENT_SHADER, cso); + handle = cso->data; + } + else { + handle = ((struct cso_fragment_shader *)cso_hash_iter_data(iter))->data; + } + + if (ctx->fragment_shader != handle) { + ctx->fragment_shader = handle; + ctx->pipe->bind_fs_state(ctx->pipe, handle); + } +} + +void cso_set_vertex_shader(struct cso_context *ctx, + const struct pipe_shader_state *templ) +{ + unsigned hash_key = cso_construct_key((void*)templ, + sizeof(struct pipe_shader_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, CSO_VERTEX_SHADER, + (void*)templ); + void *handle = NULL; + + if (cso_hash_iter_is_null(iter)) { + struct cso_vertex_shader *cso = MALLOC(sizeof(struct cso_vertex_shader)); + + cso->state = *templ; + cso->data = ctx->pipe->create_vs_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_vs_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_VERTEX_SHADER, cso); + handle = cso->data; + } + else { + handle = ((struct cso_vertex_shader *)cso_hash_iter_data(iter))->data; + } + + if (ctx->vertex_shader != handle) { + ctx->vertex_shader = handle; + ctx->pipe->bind_vs_state(ctx->pipe, handle); + } +} diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h new file mode 100644 index 0000000000..1f2a630804 --- /dev/null +++ b/src/gallium/auxiliary/cso_cache/cso_context.h @@ -0,0 +1,85 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CSO_CONTEXT_H +#define CSO_CONTEXT_H + +#include "pipe/p_context.h" +#include "pipe/p_state.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +struct cso_context; + +struct cso_context *cso_create_context( struct pipe_context *pipe ); + +void cso_set_blend( struct cso_context *cso, + const struct pipe_blend_state *blend ); + +void cso_set_depth_stencil_alpha( struct cso_context *cso, + const struct pipe_depth_stencil_alpha_state *dsa ); + +void cso_set_rasterizer( struct cso_context *cso, + const struct pipe_rasterizer_state *rasterizer ); + +void cso_set_samplers( struct cso_context *cso, + unsigned count, + const struct pipe_sampler_state **states ); + +/* Alternate interface to support state trackers that like to modify + * samplers one at a time: + */ +void cso_single_sampler( struct cso_context *cso, + unsigned nr, + const struct pipe_sampler_state *states ); + +void cso_single_sampler_done( struct cso_context *cso ); + + +/* These aren't really sensible -- most of the time the api provides + * object semantics for shaders anyway, and the cases where it doesn't + * (eg mesa's internall-generated texenv programs), it will be up to + * the state tracker to implement their own specialized caching. + */ +void cso_set_fragment_shader( struct cso_context *cso, + const struct pipe_shader_state *shader ); + +void cso_set_vertex_shader( struct cso_context *cso, + const struct pipe_shader_state *shader ); + +void cso_destroy_context( struct cso_context *cso ); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.c b/src/gallium/auxiliary/cso_cache/cso_hash.c index b3b4d667d2..5cad5d3be7 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.c +++ b/src/gallium/auxiliary/cso_cache/cso_hash.c @@ -101,13 +101,6 @@ static void *cso_data_allocate_node(struct cso_hash_data *hash) static void cso_data_free_node(struct cso_node *node) { - /* XXX still a leak here. - * Need to cast value ptr to original cso type, then free the - * driver-specific data hanging off of it. For example: - struct cso_sampler *csamp = (struct cso_sampler *) node->value; - FREE(csamp->data); - */ - FREE(node->value); FREE(node); } diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.h b/src/gallium/auxiliary/cso_cache/cso_hash.h index d5bca9d591..84b45a5963 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.h +++ b/src/gallium/auxiliary/cso_cache/cso_hash.h @@ -25,6 +25,16 @@ * **************************************************************************/ +/** + This file provides a hash implementation that is capable of dealing + with collisions. It stores colliding entries in linked list. All + functions operating on the hash return an iterator. The iterator + itself points to the collision list. If there wasn't any collision + the list will have just one entry, otherwise client code should + iterate over the entries to find the exact entry among ones that + had the same key (e.g. memcmp could be used on the data to check + that) +*/ /* * Authors: * Zack Rusin <zack@tungstengraphics.com> diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index c18dcb2927..5cb7664c85 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -16,6 +16,7 @@ draw = env.ConvenienceLibrary( 'draw_offset.c', 'draw_prim.c', 'draw_pstipple.c', + 'draw_passthrough.c', 'draw_stipple.c', 'draw_twoside.c', 'draw_unfilled.c', diff --git a/src/gallium/auxiliary/draw/draw_aaline.c b/src/gallium/auxiliary/draw/draw_aaline.c index 7660e56fe6..6b1e640ae9 100644 --- a/src/gallium/auxiliary/draw/draw_aaline.c +++ b/src/gallium/auxiliary/draw/draw_aaline.c @@ -78,7 +78,8 @@ struct aaline_stage void *sampler_cso; struct pipe_texture *texture; - uint sampler_unit; + uint num_samplers; + uint num_textures; /* @@ -98,11 +99,10 @@ struct aaline_stage void (*driver_bind_fs_state)(struct pipe_context *, void *); void (*driver_delete_fs_state)(struct pipe_context *, void *); - void (*driver_bind_sampler_state)(struct pipe_context *, unsigned, void *); - - void (*driver_set_sampler_texture)(struct pipe_context *, - unsigned sampler, - struct pipe_texture *); + void (*driver_bind_sampler_states)(struct pipe_context *, unsigned, + void **); + void (*driver_set_sampler_textures)(struct pipe_context *, unsigned, + struct pipe_texture **); struct pipe_context *pipe; }; @@ -607,6 +607,7 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) auto struct aaline_stage *aaline = aaline_stage(stage); struct draw_context *draw = stage->draw; struct pipe_context *pipe = aaline->pipe; + uint num = MAX2(aaline->num_textures, aaline->num_samplers); assert(draw->rasterizer->line_smooth); @@ -624,8 +625,11 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) */ bind_aaline_fragment_shader(aaline); - aaline->driver_bind_sampler_state(pipe, aaline->sampler_unit, aaline->sampler_cso); - aaline->driver_set_sampler_texture(pipe, aaline->sampler_unit, aaline->texture); + aaline->state.sampler[num] = aaline->sampler_cso; + aaline->state.texture[num] = aaline->texture; + + aaline->driver_bind_sampler_states(pipe, num + 1, aaline->state.sampler); + aaline->driver_set_sampler_textures(pipe, num + 1, aaline->state.texture); /* now really draw first line */ stage->line = aaline_line; @@ -647,10 +651,10 @@ aaline_flush(struct draw_stage *stage, unsigned flags) aaline->driver_bind_fs_state(pipe, aaline->fs->driver_fs); /* XXX restore original texture, sampler state */ - aaline->driver_bind_sampler_state(pipe, aaline->sampler_unit, - aaline->state.sampler[aaline->sampler_unit]); - aaline->driver_set_sampler_texture(pipe, aaline->sampler_unit, - aaline->state.texture[aaline->sampler_unit]); + aaline->driver_bind_sampler_states(pipe, aaline->num_samplers, + aaline->state.sampler); + aaline->driver_set_sampler_textures(pipe, aaline->num_textures, + aaline->state.texture); draw->extra_vp_outputs.slot = 0; } @@ -729,7 +733,8 @@ aaline_bind_fs_state(struct pipe_context *pipe, void *fs) /* save current */ aaline->fs = aafs; /* pass-through */ - aaline->driver_bind_fs_state(aaline->pipe, aafs->driver_fs); + aaline->driver_bind_fs_state(aaline->pipe, + (aafs ? aafs->driver_fs : NULL)); } @@ -745,26 +750,28 @@ aaline_delete_fs_state(struct pipe_context *pipe, void *fs) static void -aaline_bind_sampler_state(struct pipe_context *pipe, - unsigned unit, void *sampler) +aaline_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) { struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); /* save current */ - aaline->state.sampler[unit] = sampler; + memcpy(aaline->state.sampler, sampler, num * sizeof(void *)); + aaline->num_samplers = num; /* pass-through */ - aaline->driver_bind_sampler_state(aaline->pipe, unit, sampler); + aaline->driver_bind_sampler_states(aaline->pipe, num, sampler); } static void -aaline_set_sampler_texture(struct pipe_context *pipe, - unsigned sampler, struct pipe_texture *texture) +aaline_set_sampler_textures(struct pipe_context *pipe, + unsigned num, struct pipe_texture **texture) { struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); /* save current */ - aaline->state.texture[sampler] = texture; + memcpy(aaline->state.texture, texture, num * sizeof(struct pipe_texture *)); + aaline->num_textures = num; /* pass-through */ - aaline->driver_set_sampler_texture(aaline->pipe, sampler, texture); + aaline->driver_set_sampler_textures(aaline->pipe, num, texture); } @@ -798,14 +805,14 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe) aaline->driver_bind_fs_state = pipe->bind_fs_state; aaline->driver_delete_fs_state = pipe->delete_fs_state; - aaline->driver_bind_sampler_state = pipe->bind_sampler_state; - aaline->driver_set_sampler_texture = pipe->set_sampler_texture; + aaline->driver_bind_sampler_states = pipe->bind_sampler_states; + aaline->driver_set_sampler_textures = pipe->set_sampler_textures; /* override the driver's functions */ pipe->create_fs_state = aaline_create_fs_state; pipe->bind_fs_state = aaline_bind_fs_state; pipe->delete_fs_state = aaline_delete_fs_state; - pipe->bind_sampler_state = aaline_bind_sampler_state; - pipe->set_sampler_texture = aaline_set_sampler_texture; + pipe->bind_sampler_states = aaline_bind_sampler_states; + pipe->set_sampler_textures = aaline_set_sampler_textures; } diff --git a/src/gallium/auxiliary/draw/draw_aapoint.c b/src/gallium/auxiliary/draw/draw_aapoint.c index 70f696475f..99e9e9fe34 100644 --- a/src/gallium/auxiliary/draw/draw_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_aapoint.c @@ -800,7 +800,8 @@ aapoint_bind_fs_state(struct pipe_context *pipe, void *fs) /* save current */ aapoint->fs = aafs; /* pass-through */ - aapoint->driver_bind_fs_state(aapoint->pipe, aafs->driver_fs); + aapoint->driver_bind_fs_state(aapoint->pipe, + (aafs ? aafs->driver_fs : NULL)); } diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 428b6209e0..fed2b6e759 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -34,6 +34,7 @@ #include "pipe/p_util.h" #include "draw_context.h" #include "draw_private.h" +#include "draw_vbuf.h" @@ -114,6 +115,13 @@ void draw_destroy( struct draw_context *draw ) draw->pipeline.rasterize->destroy( draw->pipeline.rasterize ); tgsi_exec_machine_free_data(&draw->machine); align_free( draw->vs.queue[0].vertex ); /* Frees all the vertices. */ + + /* Not so fast -- we're just borrowing this at the moment. + * + if (draw->render) + draw->render->destroy( draw->render ); + */ + FREE( draw ); } @@ -349,3 +357,10 @@ void draw_reset_vertex_ids(struct draw_context *draw) draw_vertex_cache_reset_vertex_ids(draw); } + + +void draw_set_render( struct draw_context *draw, + struct vbuf_render *render ) +{ + draw->render = render; +} diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index ab87b4127c..df63e91a22 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -168,4 +168,9 @@ unsigned draw_trim_prim( unsigned mode, unsigned count ); + +struct vbuf_render; +void draw_set_render( struct draw_context *draw, + struct vbuf_render *render ); + #endif /* DRAW_CONTEXT_H */ diff --git a/src/gallium/auxiliary/draw/draw_passthrough.c b/src/gallium/auxiliary/draw/draw_passthrough.c new file mode 100644 index 0000000000..a51fa0ab23 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_passthrough.c @@ -0,0 +1,222 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +/* This code is a prototype of what a passhthrough vertex shader might + * look like. + * + * Probably the best approach for us is to do: + * - vertex fetch + * - vertex shader + * - cliptest / viewport transform + * + * in one step, then examine the clipOrMask & choose between two paths: + * + * Either: + * - build primitive headers + * - clip and the primitive path + * - build clipped vertex buffers, + * - vertex-emit to vbuf buffers + * + * Or, if no clipping: + * - vertex-emit directly to vbuf buffers + * + * But when bypass clipping is enabled, we just take the latter + * choice. If (some new) passthrough-vertex-shader flag is also set, + * the pipeline degenerates to: + * + * - vertex fetch + * - vertex emit to vbuf buffers + * + * Which is what is prototyped here. + */ +#include "pipe/p_util.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" + + + +/* Example of a fetch/emit passthrough shader which could be + * generated when bypass_clipping is enabled on a passthrough vertex + * shader. + */ +static void fetch_xyz_rgb_st( struct draw_context *draw, + float *out, + unsigned start, + unsigned count ) +{ + const unsigned *pitch = draw->vertex_fetch.pitch; + const ubyte **src = draw->vertex_fetch.src_ptr; + unsigned i; + + const ubyte *xyzw = src[0] + start * pitch[0]; + const ubyte *rgba = src[1] + start * pitch[1]; + const ubyte *st = src[2] + start * pitch[2]; + + /* loop over vertex attributes (vertex shader inputs) + */ + for (i = 0; i < count; i++) { + { + const float *in = (const float *)xyzw; xyzw += pitch[0]; + /* decode input, encode output. Assume both are float[4] */ + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + } + + { + const float *in = (const float *)rgba; rgba += pitch[1]; + /* decode input, encode output. Assume both are float[4] */ + out[4] = in[0]; + out[5] = in[1]; + out[6] = in[2]; + out[7] = in[3]; + } + + { + const float *in = (const float *)st; st += pitch[2]; + /* decode input, encode output. Assume both are float[2] */ + out[8] = in[0]; + out[9] = in[1]; + } + + out += 10; + } +} + + +static boolean update_shader( struct draw_context *draw ) +{ + const struct vertex_info *vinfo = draw->render->get_vertex_info(draw->render); + + unsigned nr_attrs = vinfo->num_attribs; + unsigned i; + + for (i = 0; i < nr_attrs; i++) { + unsigned buf = draw->vertex_element[i].vertex_buffer_index; + + draw->vertex_fetch.src_ptr[i] = (const ubyte *) draw->user.vbuffer[buf] + + draw->vertex_buffer[buf].buffer_offset + + draw->vertex_element[i].src_offset; + + draw->vertex_fetch.pitch[i] = draw->vertex_buffer[buf].pitch; + draw->vertex_fetch.fetch[i] = NULL; + } + + draw->vertex_fetch.nr_attrs = nr_attrs; + draw->vertex_fetch.fetch_func = NULL; + draw->vertex_fetch.pt_fetch = NULL; + + draw->pt.hw_vertex_size = vinfo->size * 4; + + /* Just trying to figure out how this would work: + */ + if (nr_attrs == 3 && + 0 /* some other tests */) + { + draw->vertex_fetch.pt_fetch = fetch_xyz_rgb_st; + assert(vinfo->size == 10); + return TRUE; + } + + return FALSE; +} + + + +static boolean set_prim( struct draw_context *draw, + unsigned prim ) +{ + assert(!draw->user.elts); + + draw->pt.prim = prim; + + switch (prim) { + case PIPE_PRIM_LINE_LOOP: + case PIPE_PRIM_QUADS: + case PIPE_PRIM_QUAD_STRIP: + return FALSE; + default: + draw->render->set_primitive( draw->render, prim ); + return TRUE; + } +} + + + +boolean +draw_passthrough_arrays(struct draw_context *draw, + unsigned prim, + unsigned start, + unsigned count) +{ + float *hw_verts; + + if (!set_prim(draw, prim)) + return FALSE; + + if (!update_shader( draw )) + return FALSE; + + hw_verts = draw->render->allocate_vertices( draw->render, + draw->pt.hw_vertex_size, + count ); + + if (!hw_verts) + return FALSE; + + /* Single routine to fetch vertices, run shader and emit HW verts. + * Clipping and viewport transformation are done on hardware. + */ + draw->vertex_fetch.pt_fetch( draw, + hw_verts, + start, count ); + + /* Draw arrays path to avoid re-emitting index list again and + * again. + */ + draw->render->draw_arrays( draw->render, + start, + count ); + + + draw->render->release_vertices( draw->render, + hw_verts, + draw->pt.hw_vertex_size, + count ); + + return TRUE; +} + diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index c732d723a7..4147472d45 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -133,7 +133,7 @@ struct draw_vertex_shader { /* This member will disappear shortly: */ - const struct pipe_shader_state *state; + struct pipe_shader_state state; struct tgsi_shader_info info; @@ -162,8 +162,14 @@ typedef void (*full_fetch_func)( struct draw_context *draw, const unsigned *elts, unsigned count ); +typedef void (*pt_fetch_func)( struct draw_context *draw, + float *out, + unsigned start, + unsigned count ); +struct vbuf_render; + /** * Private context for the drawing module. */ @@ -191,6 +197,17 @@ struct draw_context struct draw_stage *rasterize; } pipeline; + + struct vbuf_render *render; + + /* Support prototype passthrough path: + */ + struct { + unsigned prim; + unsigned hw_vertex_size; + } pt; + + /* pipe state that we need: */ const struct pipe_rasterizer_state *rasterizer; struct pipe_viewport_state viewport; @@ -244,6 +261,7 @@ struct draw_context fetch_func fetch[PIPE_ATTRIB_MAX]; unsigned nr_attrs; full_fetch_func fetch_func; + pt_fetch_func pt_fetch; } vertex_fetch; /* Post-tnl vertex cache: @@ -331,6 +349,15 @@ struct tgsi_exec_machine; extern void draw_update_vertex_fetch( struct draw_context *draw ); +/* Prototype/hack + */ +boolean +draw_passthrough_arrays(struct draw_context *draw, + unsigned prim, + unsigned start, + unsigned count); + + #define DRAW_FLUSH_SHADER_QUEUE 0x1 /* sized not to overflow, never raised */ #define DRAW_FLUSH_PRIM_QUEUE 0x2 #define DRAW_FLUSH_VERTEX_CACHE 0x4 diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c index 2cfeb813b3..8b3e84a9a0 100644 --- a/src/gallium/auxiliary/draw/draw_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pstipple.c @@ -67,16 +67,18 @@ struct pstip_stage struct draw_stage stage; void *sampler_cso; - struct pipe_texture *texture; uint sampler_unit; + struct pipe_texture *texture; + uint num_samplers; + uint num_textures; /* * Currently bound state */ struct pstip_fragment_shader *fs; struct { - void *sampler[PIPE_MAX_SAMPLERS]; - struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + void *samplers[PIPE_MAX_SAMPLERS]; + struct pipe_texture *textures[PIPE_MAX_SAMPLERS]; const struct pipe_poly_stipple *stipple; } state; @@ -88,11 +90,10 @@ struct pstip_stage void (*driver_bind_fs_state)(struct pipe_context *, void *); void (*driver_delete_fs_state)(struct pipe_context *, void *); - void (*driver_bind_sampler_state)(struct pipe_context *, unsigned, void *); + void (*driver_bind_sampler_states)(struct pipe_context *, unsigned, void **); - void (*driver_set_sampler_texture)(struct pipe_context *, - unsigned sampler, - struct pipe_texture *); + void (*driver_set_sampler_textures)(struct pipe_context *, unsigned, + struct pipe_texture **); void (*driver_set_polygon_stipple)(struct pipe_context *, const struct pipe_poly_stipple *); @@ -484,18 +485,25 @@ static void pstip_first_tri(struct draw_stage *stage, struct prim_header *header) { struct pstip_stage *pstip = pstip_stage(stage); - struct draw_context *draw = stage->draw; struct pipe_context *pipe = pstip->pipe; + uint num_samplers; - assert(draw->rasterizer->poly_stipple_enable); + /* how many samplers? */ + /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */ + num_samplers = MAX2(pstip->num_textures, pstip->num_samplers); + num_samplers = MAX2(num_samplers, pstip->sampler_unit + 1); - /* - * Bind our fragprog, sampler and texture - */ + assert(stage->draw->rasterizer->poly_stipple_enable); + + /* bind our fragprog */ bind_pstip_fragment_shader(pstip); - pstip->driver_bind_sampler_state(pipe, pstip->sampler_unit, pstip->sampler_cso); - pstip->driver_set_sampler_texture(pipe, pstip->sampler_unit, pstip->texture); + /* plug in our sampler, texture */ + pstip->state.samplers[pstip->sampler_unit] = pstip->sampler_cso; + pstip->state.textures[pstip->sampler_unit] = pstip->texture; + + pstip->driver_bind_sampler_states(pipe, num_samplers, pstip->state.samplers); + pstip->driver_set_sampler_textures(pipe, num_samplers, pstip->state.textures); /* now really draw first line */ stage->tri = passthrough_tri; @@ -517,10 +525,10 @@ pstip_flush(struct draw_stage *stage, unsigned flags) pstip->driver_bind_fs_state(pipe, pstip->fs->driver_fs); /* XXX restore original texture, sampler state */ - pstip->driver_bind_sampler_state(pipe, pstip->sampler_unit, - pstip->state.sampler[pstip->sampler_unit]); - pstip->driver_set_sampler_texture(pipe, pstip->sampler_unit, - pstip->state.texture[pstip->sampler_unit]); + pstip->driver_bind_sampler_states(pipe, pstip->num_samplers, + pstip->state.samplers); + pstip->driver_set_sampler_textures(pipe, pstip->num_textures, + pstip->state.textures); } @@ -597,7 +605,8 @@ pstip_bind_fs_state(struct pipe_context *pipe, void *fs) /* save current */ pstip->fs = aafs; /* pass-through */ - pstip->driver_bind_fs_state(pstip->pipe, aafs->driver_fs); + pstip->driver_bind_fs_state(pstip->pipe, + (aafs ? aafs->driver_fs : NULL)); } @@ -613,26 +622,28 @@ pstip_delete_fs_state(struct pipe_context *pipe, void *fs) static void -pstip_bind_sampler_state(struct pipe_context *pipe, - unsigned unit, void *sampler) +pstip_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) { struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); /* save current */ - pstip->state.sampler[unit] = sampler; + memcpy(pstip->state.samplers, sampler, num * sizeof(void *)); + pstip->num_samplers = num; /* pass-through */ - pstip->driver_bind_sampler_state(pstip->pipe, unit, sampler); + pstip->driver_bind_sampler_states(pstip->pipe, num, sampler); } static void -pstip_set_sampler_texture(struct pipe_context *pipe, - unsigned sampler, struct pipe_texture *texture) +pstip_set_sampler_textures(struct pipe_context *pipe, + unsigned num, struct pipe_texture **texture) { struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); /* save current */ - pstip->state.texture[sampler] = texture; + memcpy(pstip->state.textures, texture, num * sizeof(struct pipe_texture *)); + pstip->num_textures = num; /* pass-through */ - pstip->driver_set_sampler_texture(pstip->pipe, sampler, texture); + pstip->driver_set_sampler_textures(pstip->pipe, num, texture); } @@ -682,8 +693,8 @@ draw_install_pstipple_stage(struct draw_context *draw, pstip->driver_bind_fs_state = pipe->bind_fs_state; pstip->driver_delete_fs_state = pipe->delete_fs_state; - pstip->driver_bind_sampler_state = pipe->bind_sampler_state; - pstip->driver_set_sampler_texture = pipe->set_sampler_texture; + pstip->driver_bind_sampler_states = pipe->bind_sampler_states; + pstip->driver_set_sampler_textures = pipe->set_sampler_textures; pstip->driver_set_polygon_stipple = pipe->set_polygon_stipple; /* override the driver's functions */ @@ -691,7 +702,7 @@ draw_install_pstipple_stage(struct draw_context *draw, pipe->bind_fs_state = pstip_bind_fs_state; pipe->delete_fs_state = pstip_delete_fs_state; - pipe->bind_sampler_state = pstip_bind_sampler_state; - pipe->set_sampler_texture = pstip_set_sampler_texture; + pipe->bind_sampler_states = pstip_bind_sampler_states; + pipe->set_sampler_textures = pstip_set_sampler_textures; pipe->set_polygon_stipple = pstip_set_polygon_stipple; } diff --git a/src/gallium/auxiliary/draw/draw_unfilled.c b/src/gallium/auxiliary/draw/draw_unfilled.c index 4d718d514c..b07860cd9e 100644 --- a/src/gallium/auxiliary/draw/draw_unfilled.c +++ b/src/gallium/auxiliary/draw/draw_unfilled.c @@ -129,7 +129,7 @@ static void unfilled_tri( struct draw_stage *stage, points( stage, header ); break; default: - abort(); + assert(0); } } diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h index cfd2b9820c..5e7de905c1 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.h +++ b/src/gallium/auxiliary/draw/draw_vbuf.h @@ -85,6 +85,12 @@ struct vbuf_render { const ushort *indices, uint nr_indices ); + /* Draw Arrays path too. + */ + void (*draw_arrays)( struct vbuf_render *, + unsigned start, + uint nr ); + /** * Called when vbuf is done with this set of vertices: */ diff --git a/src/gallium/auxiliary/draw/draw_vertex_cache.c b/src/gallium/auxiliary/draw/draw_vertex_cache.c index 53f8bbec44..161b247d4e 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_cache.c +++ b/src/gallium/auxiliary/draw/draw_vertex_cache.c @@ -41,7 +41,11 @@ void draw_vertex_cache_invalidate( struct draw_context *draw ) assert(draw->vs.queue_nr == 0); assert(draw->vcache.referenced == 0); -// memset(draw->vcache.idx, ~0, sizeof(draw->vcache.idx)); + /* There's an error somewhere in the vcache code that requires this + * memset. The bug is exposed in q3demo demo001, but probably + * elsewhere as well. Will track it down later. + */ + memset(draw->vcache.idx, ~0, sizeof(draw->vcache.idx)); } diff --git a/src/gallium/auxiliary/draw/draw_vertex_fetch.c b/src/gallium/auxiliary/draw/draw_vertex_fetch.c index cb8cdd04a3..b56d85396d 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_fetch.c +++ b/src/gallium/auxiliary/draw/draw_vertex_fetch.c @@ -54,7 +54,7 @@ fetch_##NAME(const void *ptr, float *attrib) \ int i; \ \ for (i = 0; i < SZ; i++) { \ - attrib[i] = CVT; \ + attrib[i] = CVT(i); \ } \ \ for (; i < 4; i++) { \ @@ -62,24 +62,24 @@ fetch_##NAME(const void *ptr, float *attrib) \ } \ } -#define CVT_64_FLOAT (float) ((double *) ptr)[i] -#define CVT_32_FLOAT ((float *) ptr)[i] +#define CVT_64_FLOAT(i) (float) ((double *) ptr)[i] +#define CVT_32_FLOAT(i) ((float *) ptr)[i] -#define CVT_8_USCALED (float) ((unsigned char *) ptr)[i] -#define CVT_16_USCALED (float) ((unsigned short *) ptr)[i] -#define CVT_32_USCALED (float) ((unsigned int *) ptr)[i] +#define CVT_8_USCALED(i) (float) ((unsigned char *) ptr)[i] +#define CVT_16_USCALED(i) (float) ((unsigned short *) ptr)[i] +#define CVT_32_USCALED(i) (float) ((unsigned int *) ptr)[i] -#define CVT_8_SSCALED (float) ((char *) ptr)[i] -#define CVT_16_SSCALED (float) ((short *) ptr)[i] -#define CVT_32_SSCALED (float) ((int *) ptr)[i] +#define CVT_8_SSCALED(i) (float) ((char *) ptr)[i] +#define CVT_16_SSCALED(i) (float) ((short *) ptr)[i] +#define CVT_32_SSCALED(i) (float) ((int *) ptr)[i] -#define CVT_8_UNORM (float) ((unsigned char *) ptr)[i] / 255.0f -#define CVT_16_UNORM (float) ((unsigned short *) ptr)[i] / 65535.0f -#define CVT_32_UNORM (float) ((unsigned int *) ptr)[i] / 4294967295.0f +#define CVT_8_UNORM(i) (float) ((unsigned char *) ptr)[i] / 255.0f +#define CVT_16_UNORM(i) (float) ((unsigned short *) ptr)[i] / 65535.0f +#define CVT_32_UNORM(i) (float) ((unsigned int *) ptr)[i] / 4294967295.0f -#define CVT_8_SNORM (float) ((char *) ptr)[i] / 127.0f -#define CVT_16_SNORM (float) ((short *) ptr)[i] / 32767.0f -#define CVT_32_SNORM (float) ((int *) ptr)[i] / 2147483647.0f +#define CVT_8_SNORM(i) (float) ((char *) ptr)[i] / 127.0f +#define CVT_16_SNORM(i) (float) ((short *) ptr)[i] / 32767.0f +#define CVT_32_SNORM(i) (float) ((int *) ptr)[i] / 2147483647.0f FETCH_ATTRIB( R64G64B64A64_FLOAT, 4, CVT_64_FLOAT ) FETCH_ATTRIB( R64G64B64_FLOAT, 3, CVT_64_FLOAT ) @@ -156,6 +156,16 @@ FETCH_ATTRIB( A8R8G8B8_UNORM, 4, CVT_8_UNORM ) +static void +fetch_B8G8R8A8_UNORM(const void *ptr, float *attrib) +{ + attrib[2] = CVT_8_UNORM(0); + attrib[1] = CVT_8_UNORM(1); + attrib[0] = CVT_8_UNORM(2); + attrib[3] = CVT_8_UNORM(3); +} + + static fetch_func get_fetch_func( enum pipe_format format ) { #if 0 @@ -296,6 +306,10 @@ static fetch_func get_fetch_func( enum pipe_format format ) case PIPE_FORMAT_A8R8G8B8_UNORM: return fetch_A8R8G8B8_UNORM; + + case PIPE_FORMAT_B8G8R8A8_UNORM: + return fetch_B8G8R8A8_UNORM; + case 0: return NULL; /* not sure why this is needed */ diff --git a/src/gallium/auxiliary/draw/draw_vertex_shader.c b/src/gallium/auxiliary/draw/draw_vertex_shader.c index 1e95355555..133418baca 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_shader.c +++ b/src/gallium/auxiliary/draw/draw_vertex_shader.c @@ -110,13 +110,20 @@ draw_bind_vertex_shader(struct draw_context *draw, struct draw_vertex_shader *dvs) { draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + + if (dvs) + { + draw->vertex_shader = dvs; + draw->num_vs_outputs = dvs->info.num_outputs; - draw->vertex_shader = dvs; - draw->num_vs_outputs = dvs->info.num_outputs; + tgsi_exec_machine_init(&draw->machine); - tgsi_exec_machine_init(&draw->machine); - - dvs->prepare( dvs, draw ); + dvs->prepare( dvs, draw ); + } + else { + draw->vertex_shader = NULL; + draw->num_vs_outputs = 0; + } } diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 583812aadd..55bec14116 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -71,7 +71,7 @@ vs_exec_prepare( struct draw_vertex_shader *shader, { /* specify the vertex program to interpret/execute */ tgsi_exec_machine_bind_shader(&draw->machine, - shader->state->tokens, + shader->state.tokens, PIPE_MAX_SAMPLERS, NULL /*samplers*/ ); @@ -132,20 +132,30 @@ vs_exec_run( struct draw_vertex_shader *shader, z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; - vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); - vOut[j]->edgeflag = 1; - - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - - /* Viewport mapping */ - vOut[j]->data[0][0] = x * scale[0] + trans[0]; - vOut[j]->data[0][1] = y * scale[1] + trans[1]; - vOut[j]->data[0][2] = z * scale[2] + trans[2]; - vOut[j]->data[0][3] = w; + if (!draw->rasterizer->bypass_clipping) { + vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); + vOut[j]->edgeflag = 1; + + /* divide by w */ + w = 1.0f / w; + x *= w; + y *= w; + z *= w; + + /* Viewport mapping */ + vOut[j]->data[0][0] = x * scale[0] + trans[0]; + vOut[j]->data[0][1] = y * scale[1] + trans[1]; + vOut[j]->data[0][2] = z * scale[2] + trans[2]; + vOut[j]->data[0][3] = w; + } + else { + vOut[j]->clipmask = 0; + vOut[j]->edgeflag = 1; + vOut[j]->data[0][0] = x; + vOut[j]->data[0][1] = y; + vOut[j]->data[0][2] = z; + vOut[j]->data[0][3] = w; + } /* Remaining attributes are packed into sequential post-transform * vertex attrib slots. @@ -177,7 +187,7 @@ draw_create_vs_exec(struct draw_context *draw, if (vs == NULL) return NULL; - vs->state = state; + vs->state = *state; vs->prepare = vs_exec_prepare; vs->run = vs_exec_run; vs->delete = vs_exec_delete; diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 0fd557d667..53c260be53 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -135,25 +135,35 @@ vs_llvm_run( struct draw_vertex_shader *base, unsigned slot; float x, y, z, w; - x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; - y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; - z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; - w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; - - vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); - vOut[j]->edgeflag = 1; - - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - - /* Viewport mapping */ - vOut[j]->data[0][0] = x * scale[0] + trans[0]; - vOut[j]->data[0][1] = y * scale[1] + trans[1]; - vOut[j]->data[0][2] = z * scale[2] + trans[2]; - vOut[j]->data[0][3] = w; + if (!draw->rasterizer->bypass_clipping) { + x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; + y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; + z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; + w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; + + vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); + vOut[j]->edgeflag = 1; + + /* divide by w */ + w = 1.0f / w; + x *= w; + y *= w; + z *= w; + + /* Viewport mapping */ + vOut[j]->data[0][0] = x * scale[0] + trans[0]; + vOut[j]->data[0][1] = y * scale[1] + trans[1]; + vOut[j]->data[0][2] = z * scale[2] + trans[2]; + vOut[j]->data[0][3] = w; + } + else { + vOut[j]->clipmask = 0; + vOut[j]->edgeflag = 1; + vOut[j]->data[0][0] = x; + vOut[j]->data[0][1] = y; + vOut[j]->data[0][2] = z; + vOut[j]->data[0][3] = w; + } /* Remaining attributes are packed into sequential post-transform * vertex attrib slots. diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 0b8bc2bf14..5ee2adb344 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -158,20 +158,30 @@ vs_sse_run( struct draw_vertex_shader *base, z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; - vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); - vOut[j]->edgeflag = 1; - - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - - /* Viewport mapping */ - vOut[j]->data[0][0] = x * scale[0] + trans[0]; - vOut[j]->data[0][1] = y * scale[1] + trans[1]; - vOut[j]->data[0][2] = z * scale[2] + trans[2]; - vOut[j]->data[0][3] = w; + if (!draw->rasterizer->bypass_clipping) { + vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); + vOut[j]->edgeflag = 1; + + /* divide by w */ + w = 1.0f / w; + x *= w; + y *= w; + z *= w; + + /* Viewport mapping */ + vOut[j]->data[0][0] = x * scale[0] + trans[0]; + vOut[j]->data[0][1] = y * scale[1] + trans[1]; + vOut[j]->data[0][2] = z * scale[2] + trans[2]; + vOut[j]->data[0][3] = w; + } + else { + vOut[j]->clipmask = 0; + vOut[j]->edgeflag = 1; + vOut[j]->data[0][0] = x; + vOut[j]->data[0][1] = y; + vOut[j]->data[0][2] = z; + vOut[j]->data[0][3] = w; + } /* Remaining attributes are packed into sequential post-transform * vertex attrib slots. @@ -211,14 +221,14 @@ draw_create_vs_sse(struct draw_context *draw, if (vs == NULL) return NULL; - vs->base.state = templ; + vs->base.state = *templ; vs->base.prepare = vs_sse_prepare; vs->base.run = vs_sse_run; vs->base.delete = vs_sse_delete; x86_init_func( &vs->sse2_program ); - if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state->tokens, + if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens, &vs->sse2_program )) goto fail; diff --git a/src/gallium/auxiliary/gallivm/Makefile b/src/gallium/auxiliary/gallivm/Makefile index 39fac6ea4a..c24e19e062 100644 --- a/src/gallium/auxiliary/gallivm/Makefile +++ b/src/gallium/auxiliary/gallivm/Makefile @@ -15,7 +15,7 @@ GALLIVM_SOURCES = \ storagesoa.cpp \ instructionssoa.cpp -INC_SOURCES = gallivm_builtins.cpp +INC_SOURCES = gallivm_builtins.cpp gallivmsoabuiltins.cpp CPP_SOURCES = \ $(GALLIVM_SOURCES) @@ -65,8 +65,10 @@ depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(INC_SOURCES) gallivm_builtins.cpp: llvm_builtins.c - clang --emit-llvm $< |llvm-as|opt -std-compile-opts|llvm2cpp -gen-contents -o=$@ -f -for=shader -funcname=createGallivmBuiltins + clang --emit-llvm < $< |llvm-as|opt -std-compile-opts|llvm2cpp -gen-contents -o=$@ -f -for=shader -funcname=createGallivmBuiltins +gallivmsoabuiltins.cpp: soabuiltins.c + clang --emit-llvm < $< |llvm-as|opt -std-compile-opts|llvm2cpp -gen-module -o=$@ -f -for=shader -funcname=createSoaBuiltins # Emacs tags tags: @@ -78,6 +80,7 @@ clean: -rm -f *.o */*.o *~ *.so *~ server/*.o -rm -f depend depend.bak -rm -f gallivm_builtins.cpp + -rm -f gallivmsoabuiltins.cpp symlinks: diff --git a/src/gallium/auxiliary/gallivm/gallivm.cpp b/src/gallium/auxiliary/gallivm/gallivm.cpp index d14bb3b99a..b6f641a3f8 100644 --- a/src/gallium/auxiliary/gallivm/gallivm.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm.cpp @@ -306,11 +306,19 @@ struct gallivm_prog * gallivm_ir_compile(struct gallivm_ir *ir) { struct gallivm_prog *prog = (struct gallivm_prog *)calloc(1, sizeof(struct gallivm_prog)); + + std::cout << "Before optimizations:"<<std::endl; + ir->module->dump(); + std::cout<<"-------------------------------"<<std::endl; + + PassManager veri; + veri.add(createVerifierPass()); + veri.run(*ir->module); llvm::Module *mod = llvm::CloneModule(ir->module); prog->num_consts = ir->num_consts; memcpy(prog->interpolators, ir->interpolators, sizeof(prog->interpolators)); prog->num_interp = ir->num_interp; - + /* Run optimization passes over it */ PassManager passes; passes.add(new TargetData(mod)); diff --git a/src/gallium/auxiliary/gallivm/gallivm.h b/src/gallium/auxiliary/gallivm/gallivm.h index 57912a952f..b4d6555d2f 100644 --- a/src/gallium/auxiliary/gallivm/gallivm.h +++ b/src/gallium/auxiliary/gallivm/gallivm.h @@ -33,6 +33,16 @@ #ifndef GALLIVM_H #define GALLIVM_H +/* + LLVM representation consists of two stages - layout independent + intermediate representation gallivm_ir and driver specific + gallivm_prog. TGSI is first being translated into gallivm_ir + after that driver can set number of options on gallivm_ir and + have it compiled into gallivm_prog. gallivm_prog can be either + executed (assuming there's LLVM JIT backend for the current + target) or machine code generation can be done (assuming there's + a LLVM code generator for thecurrent target) + */ #if defined __cplusplus extern "C" { #endif diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp index 55d39fa5f1..8919491792 100644 --- a/src/gallium/auxiliary/gallivm/instructions.cpp +++ b/src/gallium/auxiliary/gallivm/instructions.cpp @@ -42,6 +42,7 @@ #include <llvm/InstrTypes.h> #include <llvm/Instructions.h> #include <llvm/ParameterAttributes.h> +#include <llvm/ParamAttrsList.h> #include <sstream> #include <fstream> @@ -51,6 +52,15 @@ using namespace llvm; #include "gallivm_builtins.cpp" +#if 0 + +llvm::Value *arrayFromChannels(std::vector<llvm::Value*> &vals) +{ + VectorType *vectorType = VectorType::get(Type::FloatTy, 4); + ArrayType *vectorArray = ArrayType::get(vectorType, 4); +} +#endif + static inline std::string createFuncName(int label) { std::ostringstream stream; diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index a4d5046637..89d513afd0 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -2,9 +2,28 @@ #include "storagesoa.h" +#include "pipe/p_shader_tokens.h" + +#include <llvm/CallingConv.h> #include <llvm/Constants.h> +#include <llvm/Module.h> +#include <llvm/Function.h> +#include <llvm/Instructions.h> +#include <llvm/Transforms/Utils/Cloning.h> +#include <llvm/ParamAttrsList.h> + +#include <iostream> + +/* disable some warnings. this file is autogenerated */ +#if defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wunused-variable" +#endif using namespace llvm; +#include "gallivmsoabuiltins.cpp" +#if defined(__GNUC__) +#pragma GCC diagnostic warning "-Wunused-variable" +#endif InstructionsSoa::InstructionsSoa(llvm::Module *mod, llvm::Function *func, llvm::BasicBlock *block, StorageSoa *storage) @@ -12,6 +31,8 @@ InstructionsSoa::InstructionsSoa(llvm::Module *mod, llvm::Function *func, m_storage(storage), m_idx(0) { + createFunctionMap(); + createBuiltins(); } const char * InstructionsSoa::name(const char *prefix) const @@ -119,3 +140,167 @@ std::vector<llvm::Value*> InstructionsSoa::extractVector(llvm::Value *vector) return res; } + +void InstructionsSoa::createFunctionMap() +{ + m_functionsMap[TGSI_OPCODE_DP3] = "dp3"; + m_functionsMap[TGSI_OPCODE_DP4] = "dp4"; +} + +llvm::Function * InstructionsSoa::function(int op) +{ + if (m_functions.find(op) != m_functions.end()) + return m_functions[op]; + + std::string name = m_functionsMap[op]; + + llvm::Function *originalFunc = m_builtins->getFunction(name); + llvm::Function *func = CloneFunction(originalFunc); + currentModule()->getFunctionList().push_back(func); + std::cout << "Func parent is "<<func->getParent() + <<", cur is "<<currentModule() <<std::endl; + func->dump(); + //func->setParent(currentModule()); + m_functions[op] = func; + return func; +} + +llvm::Module * InstructionsSoa::currentModule() const +{ + BasicBlock *block = m_builder.GetInsertBlock(); + if (!block || !block->getParent()) + return 0; + + return block->getParent()->getParent(); +} + +void InstructionsSoa::createBuiltins() +{ + m_builtins = createSoaBuiltins(); +} + +std::vector<llvm::Value*> InstructionsSoa::dp3(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + llvm::Function *func = function(TGSI_OPCODE_DP3); + return callBuiltin(func, in1, in2); +} + +llvm::Value * InstructionsSoa::allocaTemp() +{ + VectorType *vector = VectorType::get(Type::FloatTy, 4); + ArrayType *vecArray = ArrayType::get(vector, 4); + AllocaInst *alloca = new AllocaInst(vecArray, name("tmpRes"), + m_builder.GetInsertBlock()); + + std::vector<Value*> indices; + indices.push_back(m_storage->constantInt(0)); + indices.push_back(m_storage->constantInt(0)); + GetElementPtrInst *getElem = new GetElementPtrInst(alloca, + indices.begin(), + indices.end(), + name("allocaPtr"), + m_builder.GetInsertBlock()); + return getElem; +} + +std::vector<llvm::Value*> InstructionsSoa::allocaToResult(llvm::Value *allocaPtr) +{ + GetElementPtrInst *xElemPtr = new GetElementPtrInst(allocaPtr, + m_storage->constantInt(0), + name("xPtr"), + m_builder.GetInsertBlock()); + GetElementPtrInst *yElemPtr = new GetElementPtrInst(allocaPtr, + m_storage->constantInt(1), + name("yPtr"), + m_builder.GetInsertBlock()); + GetElementPtrInst *zElemPtr = new GetElementPtrInst(allocaPtr, + m_storage->constantInt(2), + name("zPtr"), + m_builder.GetInsertBlock()); + GetElementPtrInst *wElemPtr = new GetElementPtrInst(allocaPtr, + m_storage->constantInt(3), + name("wPtr"), + m_builder.GetInsertBlock()); + + std::vector<llvm::Value*> res(4); + res[0] = new LoadInst(xElemPtr, name("xRes"), false, m_builder.GetInsertBlock()); + res[1] = new LoadInst(yElemPtr, name("yRes"), false, m_builder.GetInsertBlock()); + res[2] = new LoadInst(zElemPtr, name("zRes"), false, m_builder.GetInsertBlock()); + res[3] = new LoadInst(wElemPtr, name("wRes"), false, m_builder.GetInsertBlock()); + + return res; +} + +std::vector<llvm::Value*> InstructionsSoa::dp4(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + llvm::Function *func = function(TGSI_OPCODE_DP4); + return callBuiltin(func, in1, in2); +} + +std::vector<Value*> InstructionsSoa::callBuiltin(llvm::Function *func, const std::vector<llvm::Value*> in1) +{ + std::vector<Value*> params; + + llvm::Value *allocaPtr = allocaTemp(); + params.push_back(allocaPtr); + params.push_back(in1[0]); + params.push_back(in1[1]); + params.push_back(in1[2]); + params.push_back(in1[3]); + CallInst *call = m_builder.CreateCall(func, params.begin(), params.end()); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + + return allocaToResult(allocaPtr); +} + +std::vector<Value*> InstructionsSoa::callBuiltin(llvm::Function *func, const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + std::vector<Value*> params; + + llvm::Value *allocaPtr = allocaTemp(); + params.push_back(allocaPtr); + params.push_back(in1[0]); + params.push_back(in1[1]); + params.push_back(in1[2]); + params.push_back(in1[3]); + params.push_back(in2[0]); + params.push_back(in2[1]); + params.push_back(in2[2]); + params.push_back(in2[3]); + CallInst *call = m_builder.CreateCall(func, params.begin(), params.end()); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + + return allocaToResult(allocaPtr); +} + +std::vector<Value*> InstructionsSoa::callBuiltin(llvm::Function *func, const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2, + const std::vector<llvm::Value*> in3) +{ + std::vector<Value*> params; + + llvm::Value *allocaPtr = allocaTemp(); + params.push_back(allocaPtr); + params.push_back(in1[0]); + params.push_back(in1[1]); + params.push_back(in1[2]); + params.push_back(in1[3]); + params.push_back(in2[0]); + params.push_back(in2[1]); + params.push_back(in2[2]); + params.push_back(in2[3]); + params.push_back(in3[0]); + params.push_back(in3[1]); + params.push_back(in3[2]); + params.push_back(in3[3]); + CallInst *call = m_builder.CreateCall(func, params.begin(), params.end()); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + + return allocaToResult(allocaPtr); +} diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.h b/src/gallium/auxiliary/gallivm/instructionssoa.h index 4169dcbb2e..3ef51dcaff 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.h +++ b/src/gallium/auxiliary/gallivm/instructionssoa.h @@ -30,6 +30,7 @@ #include <llvm/Support/LLVMBuilder.h> +#include <map> #include <vector> namespace llvm { @@ -47,9 +48,12 @@ public: llvm::BasicBlock *block, StorageSoa *storage); std::vector<llvm::Value*> arl(const std::vector<llvm::Value*> in); - std::vector<llvm::Value*> add(const std::vector<llvm::Value*> in1, const std::vector<llvm::Value*> in2); + std::vector<llvm::Value*> dp3(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); + std::vector<llvm::Value*> dp4(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); std::vector<llvm::Value*> madd(const std::vector<llvm::Value*> in1, const std::vector<llvm::Value*> in2, const std::vector<llvm::Value*> in3); @@ -62,9 +66,29 @@ private: const char * name(const char *prefix) const; llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y, llvm::Value *z, llvm::Value *w); + void createFunctionMap(); + void createBuiltins(); + llvm::Function *function(int); + llvm::Module *currentModule() const; + llvm::Value *allocaTemp(); + std::vector<llvm::Value*> allocaToResult(llvm::Value *allocaPtr); + std::vector<llvm::Value*> callBuiltin(llvm::Function *func, + const std::vector<llvm::Value*> in1); + std::vector<llvm::Value*> callBuiltin(llvm::Function *func, + const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); + std::vector<llvm::Value*> callBuiltin(llvm::Function *func, + const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2, + const std::vector<llvm::Value*> in3); private: llvm::LLVMFoldingBuilder m_builder; StorageSoa *m_storage; + + std::map<int, std::string> m_functionsMap; + std::map<int, llvm::Function*> m_functions; + llvm::Module *m_builtins; + private: mutable int m_idx; mutable char m_name[32]; diff --git a/src/gallium/auxiliary/gallivm/soabuiltins.c b/src/gallium/auxiliary/gallivm/soabuiltins.c new file mode 100644 index 0000000000..24c14e1b69 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/soabuiltins.c @@ -0,0 +1,72 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * This file is compiled with clang into the LLVM bitcode + * + * Authors: + * Zack Rusin zack@tungstengraphics.com + */ +typedef __attribute__(( ocu_vector_type(4) )) float float4; + +void dp3(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, + float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) +{ + float4 dot = (tmp0x * tmp1x) + (tmp0y * tmp1y) + + (tmp0z * tmp1z); + + res[0] = dot; + res[1] = dot; + res[2] = dot; + res[3] = dot; +} + + +void dp4(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, + float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) +{ + float4 dot = (tmp0x * tmp1x) + (tmp0y * tmp1y) + + (tmp0z * tmp1z) + (tmp0w * tmp1w); + + res[0] = dot; + res[1] = dot; + res[2] = dot; + res[3] = dot; +} + +#if 0 +void yo(float4 *out, float4 *in) +{ + float4 res[4]; + + dp3(res, in[0], in[1], in[2], in[3], + in[4], in[5], in[6], in[7]); + out[1] = res[1]; +} +#endif diff --git a/src/gallium/auxiliary/gallivm/storagesoa.cpp b/src/gallium/auxiliary/gallivm/storagesoa.cpp index ed0674a96f..bb6fe3d7e1 100644 --- a/src/gallium/auxiliary/gallivm/storagesoa.cpp +++ b/src/gallium/auxiliary/gallivm/storagesoa.cpp @@ -277,7 +277,7 @@ llvm::Constant * StorageSoa::createConstGlobalVector(const std::vector<float> &v return constVector; } -std::vector<llvm::Value*> StorageSoa::load(Argument type, int idx, int swizzle, +std::vector<llvm::Value*> StorageSoa::load(enum tgsi_file_type type, int idx, int swizzle, llvm::Value *indIdx) { std::vector<llvm::Value*> val(4); @@ -292,25 +292,29 @@ std::vector<llvm::Value*> StorageSoa::load(Argument type, int idx, int swizzle, debug_printf("XXXXXXXXX realIdx = %p, indIdx = %p\n", realIndex, indIdx); switch(type) { - case Input: + case TGSI_FILE_INPUT: val = inputElement(realIndex); break; - case Output: + case TGSI_FILE_OUTPUT: val = outputElement(realIndex); break; - case Temp: + case TGSI_FILE_TEMPORARY: val = tempElement(realIndex); break; - case Const: + case TGSI_FILE_CONSTANT: val = constElement(realIndex); break; - case Immediate: + case TGSI_FILE_IMMEDIATE: val = immediateElement(realIndex); break; - case Address: + case TGSI_FILE_ADDRESS: debug_printf("Address not handled in the load phase!\n"); assert(0); break; + default: + debug_printf("Unknown load!\n"); + assert(0); + break; } if (!gallivm_is_swizzle(swizzle)) return val; @@ -324,21 +328,21 @@ std::vector<llvm::Value*> StorageSoa::load(Argument type, int idx, int swizzle, return res; } -void StorageSoa::store(Argument type, int idx, const std::vector<llvm::Value*> &val, +void StorageSoa::store(enum tgsi_file_type type, int idx, const std::vector<llvm::Value*> &val, int mask) { llvm::Value *out = 0; switch(type) { - case Output: + case TGSI_FILE_OUTPUT: out = m_output; break; - case Temp: + case TGSI_FILE_TEMPORARY: out = m_temps; break; - case Input: + case TGSI_FILE_INPUT: out = m_input; break; - case Address: { + case TGSI_FILE_ADDRESS: { llvm::Value *addr = m_addresses[idx]; if (!addr) { addAddress(idx); diff --git a/src/gallium/auxiliary/gallivm/storagesoa.h b/src/gallium/auxiliary/gallivm/storagesoa.h index 6443351f27..ae2fc7c6ae 100644 --- a/src/gallium/auxiliary/gallivm/storagesoa.h +++ b/src/gallium/auxiliary/gallivm/storagesoa.h @@ -28,6 +28,8 @@ #ifndef STORAGESOA_H #define STORAGESOA_H +#include <pipe/p_shader_tokens.h> + #include <vector> #include <list> #include <map> @@ -46,15 +48,6 @@ namespace llvm { class StorageSoa { public: - enum Argument { - Input, - Output, - Temp, - Const, - Immediate, - Address - }; -public: StorageSoa(llvm::BasicBlock *block, llvm::Value *input, llvm::Value *output, @@ -62,9 +55,9 @@ public: llvm::Value *temps); - std::vector<llvm::Value*> load(Argument type, int idx, int swizzle, + std::vector<llvm::Value*> load(enum tgsi_file_type type, int idx, int swizzle, llvm::Value *indIdx =0); - void store(Argument type, int idx, const std::vector<llvm::Value*> &val, + void store(enum tgsi_file_type type, int idx, const std::vector<llvm::Value*> &val, int mask); void addImmediate(float *vec); diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index 2cb4acce32..3f65865a5a 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -708,25 +708,9 @@ translate_instructionir(llvm::Module *module, if (src->SrcRegister.Indirect) { indIdx = storage->addrElement(src->SrcRegisterInd.Index); } - if (src->SrcRegister.File == TGSI_FILE_CONSTANT) { - val = storage->load(StorageSoa::Const, - src->SrcRegister.Index, swizzle, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_INPUT) { - val = storage->load(StorageSoa::Input, - src->SrcRegister.Index, swizzle, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) { - val = storage->load(StorageSoa::Temp, - src->SrcRegister.Index, swizzle, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) { - val = storage->load(StorageSoa::Output, - src->SrcRegister.Index, swizzle, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_IMMEDIATE) { - val = storage->load(StorageSoa::Immediate, - src->SrcRegister.Index, swizzle, indIdx); - } else { - fprintf(stderr, "ERROR: not supported llvm source %d\n", src->SrcRegister.File); - return; - } + + val = storage->load((enum tgsi_file_type)src->SrcRegister.File, + src->SrcRegister.Index, swizzle, indIdx); inputs[i] = val; } @@ -763,9 +747,11 @@ translate_instructionir(llvm::Module *module, } break; case TGSI_OPCODE_DP3: { + out = instr->dp3(inputs[0], inputs[1]); } break; case TGSI_OPCODE_DP4: { + out = instr->dp4(inputs[0], inputs[1]); } break; case TGSI_OPCODE_DST: { @@ -1067,19 +1053,8 @@ translate_instructionir(llvm::Module *module, for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; - if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { - storage->store(StorageSoa::Output, - dst->DstRegister.Index, out, dst->DstRegister.WriteMask); - } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) { - storage->store(StorageSoa::Temp, - dst->DstRegister.Index, out, dst->DstRegister.WriteMask); - } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) { - storage->store(StorageSoa::Address, - dst->DstRegister.Index, out, dst->DstRegister.WriteMask); - } else { - fprintf(stderr, "ERROR: unsupported LLVM destination!"); - assert(!"wrong destination"); - } + storage->store((enum tgsi_file_type)dst->DstRegister.File, + dst->DstRegister.Index, out, dst->DstRegister.WriteMask); } } diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 95a2d6fcbb..a996218ce7 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -306,6 +306,11 @@ void spe_init_func(struct spe_function *p, unsigned code_size) { p->store = align_malloc(code_size, 16); p->csr = p->store; + + /* Conservatively treat R0 - R2 and R80 - R127 as non-volatile. + */ + p->regs[0] = ~7; + p->regs[1] = (1U << (80 - 64)) - 1; } @@ -317,6 +322,48 @@ void spe_release_func(struct spe_function *p) } +int spe_allocate_available_register(struct spe_function *p) +{ + unsigned i; + for (i = 0; i < 128; i++) { + const uint64_t mask = (1ULL << (i % 128)); + const unsigned idx = i / 128; + + if ((p->regs[idx] & mask) != 0) { + p->regs[idx] &= ~mask; + return i; + } + } + + return -1; +} + + +int spe_allocate_register(struct spe_function *p, int reg) +{ + const unsigned idx = reg / 128; + const unsigned bit = reg % 128; + + assert((p->regs[idx] & (1ULL << bit)) != 0); + + p->regs[idx] &= ~(1ULL << bit); + return reg; +} + + +void spe_release_register(struct spe_function *p, int reg) +{ + const unsigned idx = reg / 128; + const unsigned bit = reg % 128; + + assert((p->regs[idx] & (1ULL << bit)) == 0); + + p->regs[idx] |= (1ULL << bit); +} + + + + void spe_bi(struct spe_function *p, unsigned rA, int d, int e) { emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4)); diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index 10ce44b3a0..5a1eb1ed8d 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -39,11 +39,27 @@ struct spe_function { uint32_t *store; uint32_t *csr; const char *fn; + + /** + * Mask of used / unused registers + * + * Each set bit corresponds to an available register. Each cleared bit + * corresponds to an allocated register. + * + * \sa + * spe_allocate_register, spe_allocate_available_register, + * spe_release_register + */ + uint64_t regs[2]; }; extern void spe_init_func(struct spe_function *p, unsigned code_size); extern void spe_release_func(struct spe_function *p); +extern int spe_allocate_available_register(struct spe_function *p); +extern int spe_allocate_register(struct spe_function *p, int reg); +extern void spe_release_register(struct spe_function *p, int reg); + #endif /* RTASM_PPC_SPE_H */ #ifndef EMIT_ diff --git a/src/gallium/auxiliary/sct/Makefile b/src/gallium/auxiliary/sct/Makefile new file mode 100644 index 0000000000..516d1756cf --- /dev/null +++ b/src/gallium/auxiliary/sct/Makefile @@ -0,0 +1,12 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = sct + +C_SOURCES = \ + sct.c + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/auxiliary/sct/SConscript b/src/gallium/auxiliary/sct/SConscript new file mode 100644 index 0000000000..76927d973f --- /dev/null +++ b/src/gallium/auxiliary/sct/SConscript @@ -0,0 +1,9 @@ +Import('*') + +sct = env.ConvenienceLibrary( + target = 'sct', + source = [ + 'sct.c' + ]) + +auxiliaries.insert(0, sct) diff --git a/src/gallium/auxiliary/sct/sct.c b/src/gallium/auxiliary/sct/sct.c new file mode 100644 index 0000000000..97ee5882a1 --- /dev/null +++ b/src/gallium/auxiliary/sct/sct.c @@ -0,0 +1,453 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_util.h" +#include "pipe/p_state.h" +#include "pipe/p_inlines.h" +#include "sct.h" + + +struct texture_list +{ + struct pipe_texture *texture; + struct texture_list *next; +}; + + + +#define MAX_SURFACES ((PIPE_MAX_COLOR_BUFS) + 1) + +struct sct_context +{ + const struct pipe_context *context; + + /** surfaces the context is drawing into */ + struct pipe_surface *surfaces[MAX_SURFACES]; + + /** currently bound textures */ + struct pipe_texture *textures[PIPE_MAX_SAMPLERS]; + + /** previously bound textures, used but not flushed */ + struct texture_list *textures_used; + + boolean needs_flush; + + struct sct_context *next; +}; + + + +struct sct_surface +{ + const struct pipe_surface *surface; + + /** list of contexts drawing to this surface */ + struct sct_context_list *contexts; + + struct sct_surface *next; +}; + + + +/** + * Find the surface_info for the given pipe_surface + */ +static struct sct_surface * +find_surface_info(struct surface_context_tracker *sct, + const struct pipe_surface *surface) +{ + struct sct_surface *si; + for (si = sct->surfaces; si; si = si->next) + if (si->surface == surface) + return si; + return NULL; +} + + +/** + * As above, but create new surface_info if surface is new. + */ +static struct sct_surface * +find_create_surface_info(struct surface_context_tracker *sct, + const struct pipe_surface *surface) +{ + struct sct_surface *si = find_surface_info(sct, surface); + if (si) + return si; + + /* alloc new */ + si = CALLOC_STRUCT(sct_surface); + if (si) { + si->surface = surface; + + /* insert at head */ + si->next = sct->surfaces; + sct->surfaces = si; + } + + return si; +} + + +/** + * Find a context_info for the given context. + */ +static struct sct_context * +find_context_info(struct surface_context_tracker *sct, + const struct pipe_context *context) +{ + struct sct_context *ci; + for (ci = sct->contexts; ci; ci = ci->next) + if (ci->context == context) + return ci; + return NULL; +} + + +/** + * As above, but create new context_info if context is new. + */ +static struct sct_context * +find_create_context_info(struct surface_context_tracker *sct, + const struct pipe_context *context) +{ + struct sct_context *ci = find_context_info(sct, context); + if (ci) + return ci; + + /* alloc new */ + ci = CALLOC_STRUCT(sct_context); + if (ci) { + ci->context = context; + + /* insert at head */ + ci->next = sct->contexts; + sct->contexts = ci; + } + + return ci; +} + + +/** + * Is the context already bound to the surface? + */ +static boolean +find_surface_context(const struct sct_surface *si, + const struct pipe_context *context) +{ + const struct sct_context_list *cl; + for (cl = si->contexts; cl; cl = cl->next) { + if (cl->context == context) { + return TRUE; + } + } + return FALSE; +} + + +/** + * Add a context to the list of contexts associated with a surface. + */ +static void +add_context_to_surface(struct sct_surface *si, + const struct pipe_context *context) +{ + struct sct_context_list *cl = CALLOC_STRUCT(sct_context_list); + if (cl) { + cl->context = context; + /* insert at head of list of contexts */ + cl->next = si->contexts; + si->contexts = cl; + } +} + + +/** + * Remove a context from the list of contexts associated with a surface. + */ +static void +remove_context_from_surface(struct sct_surface *si, + const struct pipe_context *context) +{ + struct sct_context_list *prev = NULL, *curr, *next; + + for (curr = si->contexts; curr; curr = next) { + if (curr->context == context) { + /* remove */ + if (prev) + prev->next = curr->next; + else + si->contexts = curr->next; + next = curr->next; + FREE(curr); + } + else { + prev = curr; + } + } +} + + +/** + * Unbind context from surface. + */ +static void +unbind_context_surface(struct surface_context_tracker *sct, + struct pipe_context *context, + struct pipe_surface *surface) +{ + struct sct_surface *si = find_surface_info(sct, surface); + if (si) { + remove_context_from_surface(si, context); + } +} + + +/** + * Bind context to a set of surfaces (color + Z). + * Like MakeCurrent(). + */ +void +sct_bind_surfaces(struct surface_context_tracker *sct, + struct pipe_context *context, + uint num_surf, + struct pipe_surface **surfaces) +{ + struct sct_context *ci = find_create_context_info(sct, context); + uint i; + + if (!ci) { + return; /* out of memory */ + } + + /* unbind currently bound surfaces */ + for (i = 0; i < MAX_SURFACES; i++) { + if (ci->surfaces[i]) { + unbind_context_surface(sct, context, ci->surfaces[i]); + } + } + + /* bind new surfaces */ + for (i = 0; i < num_surf; i++) { + struct sct_surface *si = find_create_surface_info(sct, surfaces[i]); + if (!find_surface_context(si, context)) { + add_context_to_surface(si, context); + } + } +} + + +/** + * Return list of contexts bound to a surface. + */ +const struct sct_context_list * +sct_get_surface_contexts(struct surface_context_tracker *sct, + const struct pipe_surface *surface) +{ + const struct sct_surface *si = find_surface_info(sct, surface); + return si->contexts; +} + + + +static boolean +find_texture(const struct sct_context *ci, + const struct pipe_texture *texture) +{ + const struct texture_list *tl; + + for (tl = ci->textures_used; tl; tl = tl->next) { + if (tl->texture == texture) { + return TRUE; + } + } + return FALSE; +} + + +/** + * Add the given texture to the context's list of used textures. + */ +static void +add_texture_used(struct sct_context *ci, + struct pipe_texture *texture) +{ + if (!find_texture(ci, texture)) { + /* add to list */ + struct texture_list *tl = CALLOC_STRUCT(texture_list); + if (tl) { + pipe_texture_reference(&tl->texture, texture); + /* insert at head */ + tl->next = ci->textures_used; + ci->textures_used = tl; + } + } +} + + +/** + * Bind a texture to a rendering context. + */ +void +sct_bind_texture(struct surface_context_tracker *sct, + struct pipe_context *context, + uint unit, + struct pipe_texture *tex) +{ + struct sct_context *ci = find_context_info(sct, context); + + if (ci->textures[unit] != tex) { + /* put texture on the 'used' list */ + add_texture_used(ci, tex); + /* bind new */ + pipe_texture_reference(&ci->textures[unit], tex); + } +} + + +/** + * Check if the given texture has been used by the rendering context + * since the last call to sct_flush_textures(). + */ +boolean +sct_is_texture_used(struct surface_context_tracker *sct, + const struct pipe_context *context, + const struct pipe_texture *texture) +{ + const struct sct_context *ci = find_context_info(sct, context); + return find_texture(ci, texture); +} + + +/** + * To be called when the image contents of a texture are changed, such + * as for gl[Copy]TexSubImage(). + * XXX this may not be needed + */ +void +sct_update_texture(struct pipe_texture *tex) +{ + +} + + +/** + * When a scene is flushed/rendered we can release the list of + * used textures. + */ +void +sct_flush_textures(struct surface_context_tracker *sct, + struct pipe_context *context) +{ + struct sct_context *ci = find_context_info(sct, context); + struct texture_list *tl, *next; + uint i; + + for (tl = ci->textures_used; tl; tl = next) { + next = tl->next; + pipe_texture_release(&tl->texture); + FREE(tl); + } + ci->textures_used = NULL; + + /* put the currently bound textures on the 'used' list */ + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + add_texture_used(ci, ci->textures[i]); + } +} + + + +void +sct_destroy_context(struct surface_context_tracker *sct, + struct pipe_context *context) +{ + /* XXX should we require an unbinding first? */ + { + struct sct_surface *si; + for (si = sct->surfaces; si; si = si->next) { + remove_context_from_surface(si, context); + } + } + + /* remove context from context_info list */ + { + struct sct_context *ci, *next, *prev = NULL; + for (ci = sct->contexts; ci; ci = next) { + next = ci->next; + if (ci->context == context) { + if (prev) + prev->next = ci->next; + else + sct->contexts = ci->next; + FREE(ci); + } + else { + prev = ci; + } + } + } + +} + + +void +sct_destroy_surface(struct surface_context_tracker *sct, + struct pipe_surface *surface) +{ + if (1) { + /* debug/sanity: no context should be bound to surface */ + struct sct_context *ci; + uint i; + for (ci = sct->contexts; ci; ci = ci->next) { + for (i = 0; i < MAX_SURFACES; i++) { + assert(ci->surfaces[i] != surface); + } + } + } + + /* remove surface from sct_surface list */ + { + struct sct_surface *si, *next, *prev = NULL; + for (si = sct->surfaces; si; si = next) { + next = si->next; + if (si->surface == surface) { + /* unlink */ + if (prev) + prev->next = si->next; + else + sct->surfaces = si->next; + FREE(si); + } + else { + prev = si; + } + } + } +} diff --git a/src/gallium/auxiliary/sct/sct.h b/src/gallium/auxiliary/sct/sct.h new file mode 100644 index 0000000000..cf7c4d3bdf --- /dev/null +++ b/src/gallium/auxiliary/sct/sct.h @@ -0,0 +1,123 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Surface/Context Tracking + * + * For some drivers, we need to monitor the binding between contexts and + * surfaces/textures. + * This code may evolve quite a bit... + */ + + +#ifndef SCT_H +#define SCT_H + + +#ifdef __cplusplus +extern "C" { +#endif + + +struct pipe_context; +struct pipe_surface; + +struct sct_context; +struct sct_surface; + + +/** + * Per-device info, basically + */ +struct surface_context_tracker +{ + struct sct_context *contexts; + struct sct_surface *surfaces; +}; + + + +/** + * Simple linked list of contexts + */ +struct sct_context_list +{ + const struct pipe_context *context; + struct sct_context_list *next; +}; + + + +extern void +sct_bind_surfaces(struct surface_context_tracker *sct, + struct pipe_context *context, + uint num_surf, + struct pipe_surface **surfaces); + + +extern void +sct_bind_texture(struct surface_context_tracker *sct, + struct pipe_context *context, + uint unit, + struct pipe_texture *texture); + + +extern void +sct_update_texture(struct pipe_texture *tex); + + +extern boolean +sct_is_texture_used(struct surface_context_tracker *sct, + const struct pipe_context *context, + const struct pipe_texture *texture); + +extern void +sct_flush_textures(struct surface_context_tracker *sct, + struct pipe_context *context); + + +extern const struct sct_context_list * +sct_get_surface_contexts(struct surface_context_tracker *sct, + const struct pipe_surface *surf); + + +extern void +sct_destroy_context(struct surface_context_tracker *sct, + struct pipe_context *context); + + +extern void +sct_destroy_surface(struct surface_context_tracker *sct, + struct pipe_surface *surface); + + + +#ifdef __cplusplus +} +#endif + +#endif /* SCT_H */ diff --git a/src/gallium/auxiliary/sct/usage.c b/src/gallium/auxiliary/sct/usage.c new file mode 100644 index 0000000000..6227f19962 --- /dev/null +++ b/src/gallium/auxiliary/sct/usage.c @@ -0,0 +1,61 @@ +/* surface / context tracking */ + + +/* + +context A: + render to texture T + +context B: + texture from T + +----------------------- + +flush surface: + which contexts are bound to the surface? + +----------------------- + +glTexSubImage(): + which contexts need to be flushed? + + */ + + +/* + +in MakeCurrent(): + + call sct_bind_surfaces(context, list of surfaces) to update the + dependencies between context and surfaces + + +in SurfaceFlush(), or whatever it is in D3D: + + call sct_get_surface_contexts(surface) to get a list of contexts + which are currently bound to the surface. + + + +in BindTexture(): + + call sct_bind_texture(context, texture) to indicate that the texture + is used in the scene. + + +in glTexSubImage() or RenderToTexture(): + + call sct_is_texture_used(context, texture) to determine if the texture + has been used in the scene, but the scene's not flushed. If TRUE is + returned it means the scene has to be rendered/flushed before the contents + of the texture can be changed. + + +in psb_scene_flush/terminate(): + + call sct_flush_textures(context) to tell the SCT that the textures which + were used in the scene can be released. + + + +*/ diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c index ac52441400..f2ed9e0353 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c @@ -1220,7 +1220,8 @@ fetch_texel( struct tgsi_sampler *sampler, static void exec_tex(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst, - boolean biasLod) + boolean biasLod, + boolean projected) { const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; union tgsi_exec_channel r[8]; @@ -1234,17 +1235,9 @@ exec_tex(struct tgsi_exec_machine *mach, FETCH(&r[0], 0, CHAN_X); - switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { - case TGSI_EXTSWIZZLE_W: + if (projected) { FETCH(&r[1], 0, CHAN_W); micro_div( &r[0], &r[0], &r[1] ); - break; - - case TGSI_EXTSWIZZLE_ONE: - break; - - default: - assert (0); } if (biasLod) { @@ -1266,19 +1259,11 @@ exec_tex(struct tgsi_exec_machine *mach, FETCH(&r[1], 0, CHAN_Y); FETCH(&r[2], 0, CHAN_Z); - switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { - case TGSI_EXTSWIZZLE_W: + if (projected) { FETCH(&r[3], 0, CHAN_W); micro_div( &r[0], &r[0], &r[3] ); micro_div( &r[1], &r[1], &r[3] ); micro_div( &r[2], &r[2], &r[3] ); - break; - - case TGSI_EXTSWIZZLE_ONE: - break; - - default: - assert (0); } if (biasLod) { @@ -1300,19 +1285,11 @@ exec_tex(struct tgsi_exec_machine *mach, FETCH(&r[1], 0, CHAN_Y); FETCH(&r[2], 0, CHAN_Z); - switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { - case TGSI_EXTSWIZZLE_W: + if (projected) { FETCH(&r[3], 0, CHAN_W); micro_div( &r[0], &r[0], &r[3] ); micro_div( &r[1], &r[1], &r[3] ); micro_div( &r[2], &r[2], &r[3] ); - break; - - case TGSI_EXTSWIZZLE_ONE: - break; - - default: - assert (0); } if (biasLod) { @@ -2007,14 +1984,14 @@ exec_instruction( /* simple texture lookup */ /* src[0] = texcoord */ /* src[1] = sampler unit */ - exec_tex(mach, inst, FALSE); + exec_tex(mach, inst, FALSE, FALSE); break; case TGSI_OPCODE_TXB: /* Texture lookup with lod bias */ /* src[0] = texcoord (src[0].w = LOD bias) */ /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE); + exec_tex(mach, inst, TRUE, FALSE); break; case TGSI_OPCODE_TXD: @@ -2030,7 +2007,14 @@ exec_instruction( /* Texture lookup with explit LOD */ /* src[0] = texcoord (src[0].w = LOD) */ /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE); + exec_tex(mach, inst, TRUE, FALSE); + break; + + case TGSI_OPCODE_TXP: + /* Texture lookup with projection */ + /* src[0] = texcoord (src[0].w = projection) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, FALSE, TRUE); break; case TGSI_OPCODE_UP2H: diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.c b/src/gallium/auxiliary/tgsi/util/tgsi_build.c index a00ff1c2a5..9c883ab704 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.c @@ -719,7 +719,6 @@ tgsi_build_full_instruction( reg->SrcRegisterExtSwz.NegateY, reg->SrcRegisterExtSwz.NegateZ, reg->SrcRegisterExtSwz.NegateW, - reg->SrcRegisterExtSwz.ExtDivide, prev_token, instruction, header ); @@ -1057,7 +1056,6 @@ tgsi_default_src_register_ext_swz( void ) src_register_ext_swz.NegateY = 0; src_register_ext_swz.NegateZ = 0; src_register_ext_swz.NegateW = 0; - src_register_ext_swz.ExtDivide = TGSI_EXTSWIZZLE_ONE; src_register_ext_swz.Padding = 0; src_register_ext_swz.Extended = 0; @@ -1084,7 +1082,6 @@ tgsi_build_src_register_ext_swz( unsigned negate_y, unsigned negate_z, unsigned negate_w, - unsigned ext_divide, struct tgsi_token *prev_token, struct tgsi_instruction *instruction, struct tgsi_header *header ) @@ -1099,7 +1096,6 @@ tgsi_build_src_register_ext_swz( assert( negate_y <= 1 ); assert( negate_z <= 1 ); assert( negate_w <= 1 ); - assert( ext_divide <= TGSI_EXTSWIZZLE_ONE ); src_register_ext_swz = tgsi_default_src_register_ext_swz(); src_register_ext_swz.ExtSwizzleX = ext_swizzle_x; @@ -1110,7 +1106,6 @@ tgsi_build_src_register_ext_swz( src_register_ext_swz.NegateY = negate_y; src_register_ext_swz.NegateZ = negate_z; src_register_ext_swz.NegateW = negate_w; - src_register_ext_swz.ExtDivide = ext_divide; prev_token->Extended = 1; instruction_grow( instruction, header ); diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.h b/src/gallium/auxiliary/tgsi/util/tgsi_build.h index 607860e7fc..80bffc4ae7 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_build.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.h @@ -229,7 +229,6 @@ tgsi_build_src_register_ext_swz( unsigned negate_y, unsigned negate_z, unsigned negate_w, - unsigned ext_divide, struct tgsi_token *prev_token, struct tgsi_instruction *instruction, struct tgsi_header *header ); diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index 59be14a748..ceb407b884 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -459,7 +459,8 @@ static const char *TGSI_OPCODES[] = "OPCODE_IFC", "OPCODE_BREAKC", "OPCODE_KIL", - "OPCODE_END" + "OPCODE_END", + "OPCODE_TXP" }; static const char *TGSI_OPCODES_SHORT[] = @@ -597,7 +598,8 @@ static const char *TGSI_OPCODES_SHORT[] = "IFC", "BREAKC", "KIL", - "END" + "END", + "TXP" }; static const char *TGSI_SATS[] = @@ -1361,10 +1363,6 @@ dump_instruction_verbose( TXT( "\nNegateW : " ); UID( src->SrcRegisterExtSwz.NegateW ); } - if( deflt || fs->SrcRegisterExtSwz.ExtDivide != src->SrcRegisterExtSwz.ExtDivide ) { - TXT( "\nExtDivide : " ); - ENM( src->SrcRegisterExtSwz.ExtDivide, TGSI_EXTSWIZZLES ); - } if( ignored ) { TXT( "\nPadding : " ); UIX( src->SrcRegisterExtSwz.Padding ); diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index 906a46d6b4..2abbe9500e 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -7,7 +7,9 @@ C_SOURCES = \ p_debug.c \ p_tile.c \ p_util.c \ - u_mm.c + u_handle_table.c \ + u_mm.c \ + u_snprintf.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index 4717941434..2030214aa7 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -6,7 +6,9 @@ util = env.ConvenienceLibrary( 'p_debug.c', 'p_tile.c', 'p_util.c', + 'u_handle_table.c', 'u_mm.c', + 'u_snprintf.c', ]) auxiliaries.insert(0, util) diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index b9607a6ba7..09cabdae25 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -36,14 +36,37 @@ #include <stdlib.h> #endif -#include "pipe/p_debug.h" #include "pipe/p_compiler.h" +#include "pipe/p_util.h" +#include "pipe/p_debug.h" + + +#ifdef WIN32 +static INLINE void +rpl_EngDebugPrint(const char *format, ...) +{ + va_list ap; + va_start(ap, format); + EngDebugPrint("", (PCHAR)format, ap); + va_end(ap); +} + +int rpl_vsnprintf(char *, size_t, const char *, va_list); +#endif void debug_vprintf(const char *format, va_list ap) { #ifdef WIN32 - EngDebugPrint("Gallium3D: ", (PCHAR)format, ap); +#ifndef WINCE + /* EngDebugPrint does not handle float point arguments, so we need to use + * our own vsnprintf implementation */ + char buf[512 + 1]; + rpl_vsnprintf(buf, sizeof(buf), format, ap); + rpl_EngDebugPrint("%s", buf); +#else + /* TODO: Implement debug print for WINCE */ +#endif #else vfprintf(stderr, format, ap); #endif @@ -59,18 +82,92 @@ void debug_printf(const char *format, ...) } -static INLINE void debug_abort(void) +/* TODO: implement a debug_abort that calls EngBugCheckEx on WIN32 */ + + +static INLINE void debug_break(void) { -#ifdef WIN32 +#if (defined(__i386__) || defined(__386__)) && defined(__GNUC__) + __asm("int3"); +#elif (defined(__i386__) || defined(__386__)) && defined(__MSC__) + _asm {int 3}; +#elif defined(WIN32) && !defined(WINCE) EngDebugBreak(); #else abort(); #endif } +#if defined(WIN32) +ULONG_PTR debug_config_file = 0; +void *mapped_config_file = 0; + +enum { + eAssertAbortEn = 0x1, +}; + +/* Check for aborts enabled. */ +static unsigned abort_en() +{ + if (!mapped_config_file) + { + /* Open an 8 byte file for configuration data. */ + mapped_config_file = EngMapFile(L"\\??\\c:\\gaDebug.cfg", 8, &debug_config_file); + } + /* An value of "0" (ascii) in the configuration file will clear the first 8 bits in the test byte. */ + /* An value of "1" (ascii) in the configuration file will set the first bit in the test byte. */ + /* An value of "2" (ascii) in the configuration file will set the second bit in the test byte. */ + return ((((char *)mapped_config_file)[0]) - 0x30) & eAssertAbortEn; +} +#else /* WIN32 */ +static unsigned abort_en() +{ + return !GETENV("GALLIUM_ABORT_ON_ASSERT"); +} +#endif void debug_assert_fail(const char *expr, const char *file, unsigned line) { debug_printf("%s:%i: Assertion `%s' failed.\n", file, line, expr); - debug_abort(); + if (abort_en()) + { + debug_break(); + } else + { + debug_printf("continuing...\n"); + } +} + + +#define DEBUG_MASK_TABLE_SIZE 256 + + +/** + * Mask hash table. + * + * For now we just take the lower bits of the key, and do no attempt to solve + * collisions. Use a proper hash table when we have dozens of drivers. + */ +static uint32_t debug_mask_table[DEBUG_MASK_TABLE_SIZE]; + + +void debug_mask_set(uint32_t uuid, uint32_t mask) +{ + unsigned hash = uuid & (DEBUG_MASK_TABLE_SIZE - 1); + debug_mask_table[hash] = mask; +} + + +uint32_t debug_mask_get(uint32_t uuid) +{ + unsigned hash = uuid & (DEBUG_MASK_TABLE_SIZE - 1); + return debug_mask_table[hash]; +} + + +void debug_mask_vprintf(uint32_t uuid, uint32_t what, const char *format, va_list ap) +{ + uint32_t mask = debug_mask_get(uuid); + if(mask & what) + debug_vprintf(format, ap); } diff --git a/src/gallium/auxiliary/util/p_tile.h b/src/gallium/auxiliary/util/p_tile.h index 318b6d11a6..fdc80a13b3 100644 --- a/src/gallium/auxiliary/util/p_tile.h +++ b/src/gallium/auxiliary/util/p_tile.h @@ -52,44 +52,50 @@ pipe_clip_tile(uint x, uint y, uint *w, uint *h, const struct pipe_surface *ps) return FALSE; } +#ifdef __cplusplus +extern "C" { +#endif -extern void +void pipe_get_tile_raw(struct pipe_context *pipe, struct pipe_surface *ps, uint x, uint y, uint w, uint h, void *p, int dst_stride); -extern void +void pipe_put_tile_raw(struct pipe_context *pipe, struct pipe_surface *ps, uint x, uint y, uint w, uint h, const void *p, int src_stride); -extern void +void pipe_get_tile_rgba(struct pipe_context *pipe, struct pipe_surface *ps, uint x, uint y, uint w, uint h, float *p); -extern void +void pipe_put_tile_rgba(struct pipe_context *pipe, struct pipe_surface *ps, uint x, uint y, uint w, uint h, const float *p); -extern void +void pipe_get_tile_z(struct pipe_context *pipe, struct pipe_surface *ps, uint x, uint y, uint w, uint h, uint *z); -extern void +void pipe_put_tile_z(struct pipe_context *pipe, struct pipe_surface *ps, uint x, uint y, uint w, uint h, const uint *z); +#ifdef __cplusplus +} +#endif #endif diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c new file mode 100644 index 0000000000..d9f2f8fc28 --- /dev/null +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -0,0 +1,506 @@ +/************************************************************************** + * + * Copyright 2008 Dennis Smit + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Based on the work of Eric Anholt <anholt@FreeBSD.org> + */ + +/* FIXME: clean this entire file up */ + +#include "u_cpu_detect.h" + +#ifdef __linux__ +#define OS_LINUX +#endif +#ifdef WIN32 +#define OS_WIN32 +#endif + +#if defined(ARCH_POWERPC) +#if defined(OS_DARWIN) +#include <sys/sysctl.h> +#else +#include <signal.h> +#include <setjmp.h> +#endif +#endif + +#if defined(OS_NETBSD) || defined(OS_OPENBSD) +#include <sys/param.h> +#include <sys/sysctl.h> +#include <machine/cpu.h> +#endif + +#if defined(OS_FREEBSD) +#include <sys/types.h> +#include <sys/sysctl.h> +#endif + +#if defined(OS_LINUX) +#include <signal.h> +#endif + +#if defined(OS_WIN32) +#include <windows.h> +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> + + +static struct cpu_detect_caps __cpu_detect_caps; +static int __cpu_detect_initialized = 0; + +static int has_cpuid(void); +static int cpuid(unsigned int ax, unsigned int *p); + +/* The sigill handlers */ +#if defined(ARCH_X86) /* x86 (linux katmai handler check thing) */ +#if defined(OS_LINUX) && defined(_POSIX_SOURCE) && defined(X86_FXSR_MAGIC) +static void sigill_handler_sse(int signal, struct sigcontext sc) +{ + /* Both the "xorps %%xmm0,%%xmm0" and "divps %xmm0,%%xmm1" + * instructions are 3 bytes long. We must increment the instruction + * pointer manually to avoid repeated execution of the offending + * instruction. + * + * If the SIGILL is caused by a divide-by-zero when unmasked + * exceptions aren't supported, the SIMD FPU status and control + * word will be restored at the end of the test, so we don't need + * to worry about doing it here. Besides, we may not be able to... + */ + sc.eip += 3; + + __cpu_detect_caps.hasSSE=0; +} + +static void sigfpe_handler_sse(int signal, struct sigcontext sc) +{ + if (sc.fpstate->magic != 0xffff) { + /* Our signal context has the extended FPU state, so reset the + * divide-by-zero exception mask and clear the divide-by-zero + * exception bit. + */ + sc.fpstate->mxcsr |= 0x00000200; + sc.fpstate->mxcsr &= 0xfffffffb; + } else { + /* If we ever get here, we're completely hosed. + */ + } +} +#endif +#endif /* OS_LINUX && _POSIX_SOURCE && X86_FXSR_MAGIC */ + +#if defined(OS_WIN32) +LONG CALLBACK win32_sig_handler_sse(EXCEPTION_POINTERS* ep) +{ + if(ep->ExceptionRecord->ExceptionCode==EXCEPTION_ILLEGAL_INSTRUCTION){ + ep->ContextRecord->Eip +=3; + __cpu_detect_caps.hasSSE=0; + return EXCEPTION_CONTINUE_EXECUTION; + } + return EXCEPTION_CONTINUE_SEARCH; +} +#endif /* OS_WIN32 */ + + +#if defined(ARCH_POWERPC) && !defined(OS_DARWIN) +static sigjmp_buf __lv_powerpc_jmpbuf; +static volatile sig_atomic_t __lv_powerpc_canjump = 0; + +static void sigill_handler (int sig); + +static void sigill_handler (int sig) +{ + if (!__lv_powerpc_canjump) { + signal (sig, SIG_DFL); + raise (sig); + } + + __lv_powerpc_canjump = 0; + siglongjmp(__lv_powerpc_jmpbuf, 1); +} + +static void check_os_altivec_support(void) +{ +#if defined(OS_DARWIN) + int sels[2] = {CTL_HW, HW_VECTORUNIT}; + int has_vu = 0; + int len = sizeof (has_vu); + int err; + + err = sysctl(sels, 2, &has_vu, &len, NULL, 0); + + if (err == 0) { + if (has_vu != 0) { + __cpu_detect_caps.hasAltiVec = 1; + } + } +#else /* !OS_DARWIN */ + /* no Darwin, do it the brute-force way */ + /* this is borrowed from the libmpeg2 library */ + signal(SIGILL, sigill_handler); + if (sigsetjmp(__lv_powerpc_jmpbuf, 1)) { + signal(SIGILL, SIG_DFL); + } else { + __lv_powerpc_canjump = 1; + + __asm __volatile + ("mtspr 256, %0\n\t" + "vand %%v0, %%v0, %%v0" + : + : "r" (-1)); + + signal(SIGILL, SIG_DFL); + __cpu_detect_caps.hasAltiVec = 1; + } +#endif +} +#endif + +/* If we're running on a processor that can do SSE, let's see if we + * are allowed to or not. This will catch 2.4.0 or later kernels that + * haven't been configured for a Pentium III but are running on one, + * and RedHat patched 2.2 kernels that have broken exception handling + * support for user space apps that do SSE. + */ +static void check_os_katmai_support(void) +{ +#if defined(ARCH_X86) +#if defined(OS_FREEBSD) + int has_sse=0, ret; + int len = sizeof (has_sse); + + ret = sysctlbyname("hw.instruction_sse", &has_sse, &len, NULL, 0); + if (ret || !has_sse) + __cpu_detect_caps.hasSSE=0; + +#elif defined(OS_NETBSD) || defined(OS_OPENBSD) + int has_sse, has_sse2, ret, mib[2]; + int varlen; + + mib[0] = CTL_MACHDEP; + mib[1] = CPU_SSE; + varlen = sizeof (has_sse); + + ret = sysctl(mib, 2, &has_sse, &varlen, NULL, 0); + if (ret < 0 || !has_sse) { + __cpu_detect_caps.hasSSE = 0; + } else { + __cpu_detect_caps.hasSSE = 1; + } + + mib[1] = CPU_SSE2; + varlen = sizeof (has_sse2); + ret = sysctl(mib, 2, &has_sse2, &varlen, NULL, 0); + if (ret < 0 || !has_sse2) { + __cpu_detect_caps.hasSSE2 = 0; + } else { + __cpu_detect_caps.hasSSE2 = 1; + } + __cpu_detect_caps.hasSSE = 0; /* FIXME ?!?!? */ + +#elif defined(OS_WIN32) + LPTOP_LEVEL_EXCEPTION_FILTER exc_fil; + if (__cpu_detect_caps.hasSSE) { + exc_fil = SetUnhandledExceptionFilter(win32_sig_handler_sse); + __asm __volatile ("xorps %xmm0, %xmm0"); + SetUnhandledExceptionFilter(exc_fil); + } +#elif defined(OS_LINUX) + struct sigaction saved_sigill; + struct sigaction saved_sigfpe; + + /* Save the original signal handlers. + */ + sigaction(SIGILL, NULL, &saved_sigill); + sigaction(SIGFPE, NULL, &saved_sigfpe); + + signal(SIGILL, (void (*)(int))sigill_handler_sse); + signal(SIGFPE, (void (*)(int))sigfpe_handler_sse); + + /* Emulate test for OSFXSR in CR4. The OS will set this bit if it + * supports the extended FPU save and restore required for SSE. If + * we execute an SSE instruction on a PIII and get a SIGILL, the OS + * doesn't support Streaming SIMD Exceptions, even if the processor + * does. + */ + if (__cpu_detect_caps.hasSSE) { + __asm __volatile ("xorps %xmm1, %xmm0"); + } + + /* Emulate test for OSXMMEXCPT in CR4. The OS will set this bit if + * it supports unmasked SIMD FPU exceptions. If we unmask the + * exceptions, do a SIMD divide-by-zero and get a SIGILL, the OS + * doesn't support unmasked SIMD FPU exceptions. If we get a SIGFPE + * as expected, we're okay but we need to clean up after it. + * + * Are we being too stringent in our requirement that the OS support + * unmasked exceptions? Certain RedHat 2.2 kernels enable SSE by + * setting CR4.OSFXSR but don't support unmasked exceptions. Win98 + * doesn't even support them. We at least know the user-space SSE + * support is good in kernels that do support unmasked exceptions, + * and therefore to be safe I'm going to leave this test in here. + */ + if (__cpu_detect_caps.hasSSE) { + // test_os_katmai_exception_support(); + } + + /* Restore the original signal handlers. + */ + sigaction(SIGILL, &saved_sigill, NULL); + sigaction(SIGFPE, &saved_sigfpe, NULL); + +#else + /* We can't use POSIX signal handling to test the availability of + * SSE, so we disable it by default. + */ + __cpu_detect_caps.hasSSE = 0; +#endif /* __linux__ */ +#endif +} + + +static int has_cpuid(void) +{ +#if defined(ARCH_X86) + int a, c; + + __asm __volatile + ("pushf\n" + "popl %0\n" + "movl %0, %1\n" + "xorl $0x200000, %0\n" + "push %0\n" + "popf\n" + "pushf\n" + "popl %0\n" + : "=a" (a), "=c" (c) + : + : "cc"); + + return a != c; +#else + return 0; +#endif +} + +static int cpuid(unsigned int ax, unsigned int *p) +{ +#if defined(ARCH_X86) + unsigned int flags; + + __asm __volatile + ("movl %%ebx, %%esi\n\t" + "cpuid\n\t" + "xchgl %%ebx, %%esi" + : "=a" (p[0]), "=S" (p[1]), + "=c" (p[2]), "=d" (p[3]) + : "0" (ax)); + + return 0; +#else + return -1; +#endif +} + +void cpu_detect_initialize() +{ + unsigned int regs[4]; + unsigned int regs2[4]; + + int mib[2], ncpu; + int len; + + memset(&__cpu_detect_caps, 0, sizeof (struct cpu_detect_caps)); + + /* Check for arch type */ +#if defined(ARCH_MIPS) + __cpu_detect_caps.type = CPU_DETECT_TYPE_MIPS; +#elif defined(ARCH_ALPHA) + __cpu_detect_caps.type = CPU_DETECT_TYPE_ALPHA; +#elif defined(ARCH_SPARC) + __cpu_detect_caps.type = CPU_DETECT_TYPE_SPARC; +#elif defined(ARCH_X86) + __cpu_detect_caps.type = CPU_DETECT_TYPE_X86; +#elif defined(ARCH_POWERPC) + __cpu_detect_caps.type = CPU_DETECT_TYPE_POWERPC; +#else + __cpu_detect_caps.type = CPU_DETECT_TYPE_OTHER; +#endif + + /* Count the number of CPUs in system */ +#if !defined(OS_WIN32) && !defined(OS_UNKNOWN) && defined(_SC_NPROCESSORS_ONLN) + __cpu_detect_caps.nrcpu = sysconf(_SC_NPROCESSORS_ONLN); + if (__cpu_detect_caps.nrcpu == -1) + __cpu_detect_caps.nrcpu = 1; + +#elif defined(OS_NETBSD) || defined(OS_FREEBSD) || defined(OS_OPENBSD) + + mib[0] = CTL_HW; + mib[1] = HW_NCPU; + + len = sizeof (ncpu); + sysctl(mib, 2, &ncpu, &len, NULL, 0); + __cpu_detect_caps.nrcpu = ncpu; + +#else + __cpu_detect_caps.nrcpu = 1; +#endif + +#if defined(ARCH_X86) + /* No cpuid, old 486 or lower */ + if (has_cpuid() == 0) + return; + + __cpu_detect_caps.cacheline = 32; + + /* Get max cpuid level */ + cpuid(0x00000000, regs); + + if (regs[0] >= 0x00000001) { + unsigned int cacheline; + + cpuid (0x00000001, regs2); + + __cpu_detect_caps.x86cpuType = (regs2[0] >> 8) & 0xf; + if (__cpu_detect_caps.x86cpuType == 0xf) + __cpu_detect_caps.x86cpuType = 8 + ((regs2[0] >> 20) & 255); /* use extended family (P4, IA64) */ + + /* general feature flags */ + __cpu_detect_caps.hasTSC = (regs2[3] & (1 << 8 )) >> 8; /* 0x0000010 */ + __cpu_detect_caps.hasMMX = (regs2[3] & (1 << 23 )) >> 23; /* 0x0800000 */ + __cpu_detect_caps.hasSSE = (regs2[3] & (1 << 25 )) >> 25; /* 0x2000000 */ + __cpu_detect_caps.hasSSE2 = (regs2[3] & (1 << 26 )) >> 26; /* 0x4000000 */ + __cpu_detect_caps.hasSSE3 = (regs2[2] & (1)); /* 0x0000001 */ + __cpu_detect_caps.hasSSSE3 = (regs2[2] & (1 << 9 )) >> 9; /* 0x0000020 */ + __cpu_detect_caps.hasMMX2 = __cpu_detect_caps.hasSSE; /* SSE cpus supports mmxext too */ + + cacheline = ((regs2[1] >> 8) & 0xFF) * 8; + if (cacheline > 0) + __cpu_detect_caps.cacheline = cacheline; + } + + cpuid(0x80000000, regs); + + if (regs[0] >= 0x80000001) { + + cpuid(0x80000001, regs2); + + __cpu_detect_caps.hasMMX |= (regs2[3] & (1 << 23 )) >> 23; /* 0x0800000 */ + __cpu_detect_caps.hasMMX2 |= (regs2[3] & (1 << 22 )) >> 22; /* 0x400000 */ + __cpu_detect_caps.has3DNow = (regs2[3] & (1 << 31 )) >> 31; /* 0x80000000 */ + __cpu_detect_caps.has3DNowExt = (regs2[3] & (1 << 30 )) >> 30; + } + + if (regs[0] >= 0x80000006) { + cpuid(0x80000006, regs2); + __cpu_detect_caps.cacheline = regs2[2] & 0xFF; + } + + +#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(OS_CYGWIN) || defined(OS_OPENBSD) + if (__cpu_detect_caps.hasSSE) + check_os_katmai_support(); + + if (!__cpu_detect_caps.hasSSE) { + __cpu_detect_caps.hasSSE2 = 0; + __cpu_detect_caps.hasSSE3 = 0; + __cpu_detect_caps.hasSSSE3 = 0; + } +#else + __cpu_detect_caps.hasSSE = 0; + __cpu_detect_caps.hasSSE2 = 0; + __cpu_detect_caps.hasSSE3 = 0; + __cpu_detect_caps.hasSSSE3 = 0; +#endif +#endif /* ARCH_X86 */ + +#if defined(ARCH_POWERPC) + check_os_altivec_support(); +#endif /* ARCH_POWERPC */ + + __cpu_detect_initialized = 1; +} + +struct cpu_detect_caps *cpu_detect_get_caps() +{ + return &__cpu_detect_caps; +} + +/* The getters and setters for feature flags */ +int cpu_detect_get_tsc() +{ + return __cpu_detect_caps.hasTSC; +} + +int cpu_detect_get_mmx() +{ + return __cpu_detect_caps.hasMMX; +} + +int cpu_detect_get_mmx2() +{ + return __cpu_detect_caps.hasMMX2; +} + +int cpu_detect_get_sse() +{ + return __cpu_detect_caps.hasSSE; +} + +int cpu_detect_get_sse2() +{ + return __cpu_detect_caps.hasSSE2; +} + +int cpu_detect_get_sse3() +{ + return __cpu_detect_caps.hasSSE3; +} + +int cpu_detect_get_ssse3() +{ + return __cpu_detect_caps.hasSSSE3; +} + +int cpu_detect_get_3dnow() +{ + return __cpu_detect_caps.has3DNow; +} + +int cpu_detect_get_3dnow2() +{ + return __cpu_detect_caps.has3DNowExt; +} + +int cpu_detect_get_altivec() +{ + return __cpu_detect_caps.hasAltiVec; +} + diff --git a/src/gallium/auxiliary/util/u_cpu_detect.h b/src/gallium/auxiliary/util/u_cpu_detect.h new file mode 100644 index 0000000000..1612d49286 --- /dev/null +++ b/src/gallium/auxiliary/util/u_cpu_detect.h @@ -0,0 +1,78 @@ +/************************************************************************** + * + * Copyright 2008 Dennis Smit + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + ***************************************************************************/ + +/* + * Based on the work of Eric Anholt <anholt@FreeBSD.org> + */ + +#ifndef _CPU_DETECT_H +#define _CPU_DETECT_H + +typedef enum { + CPU_DETECT_TYPE_MIPS, + CPU_DETECT_TYPE_ALPHA, + CPU_DETECT_TYPE_SPARC, + CPU_DETECT_TYPE_X86, + CPU_DETECT_TYPE_POWERPC, + CPU_DETECT_TYPE_OTHER +} cpu_detect_type; + +struct cpu_detect_caps { + cpu_detect_type type; + int nrcpu; + + /* Feature flags */ + int x86cpuType; + int cacheline; + + int hasTSC; + int hasMMX; + int hasMMX2; + int hasSSE; + int hasSSE2; + int hasSSE3; + int hasSSSE3; + int has3DNow; + int has3DNowExt; + int hasAltiVec; +}; + +/* prototypes */ +void cpu_detect_initialize(void); +struct cpu_detect_caps *cpu_detect_get_caps(void); + +int cpu_detect_get_tsc(void); +int cpu_detect_get_mmx(void); +int cpu_detect_get_mmx2(void); +int cpu_detect_get_sse(void); +int cpu_detect_get_sse2(void); +int cpu_detect_get_sse3(void); +int cpu_detect_get_ssse3(void); +int cpu_detect_get_3dnow(void); +int cpu_detect_get_3dnow2(void); +int cpu_detect_get_altivec(void); + +#endif /* _CPU_DETECT_H */ diff --git a/src/gallium/auxiliary/util/u_handle_table.c b/src/gallium/auxiliary/util/u_handle_table.c new file mode 100644 index 0000000000..8a298f7c41 --- /dev/null +++ b/src/gallium/auxiliary/util/u_handle_table.c @@ -0,0 +1,207 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Generic handle table implementation. + * + * @author José Fonseca <jrfonseca@tungstengraphics.com> + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_util.h" + +#include "u_handle_table.h" + + +#define HANDLE_TABLE_INITIAL_SIZE 16 + + +struct handle_table +{ + /** Object array. Empty handles have a null object */ + void **objects; + + /** Number of objects the handle can currently hold */ + unsigned size; + /** Number of consecutive objects allocated at the start of the table */ + unsigned filled; + + /** Optional object destructor */ + void (*destroy)(void *object); +}; + + +struct handle_table * +handle_table_create(void) +{ + struct handle_table *ht; + + ht = MALLOC_STRUCT(handle_table); + if(!ht) + return NULL; + + ht->objects = (void **)CALLOC(HANDLE_TABLE_INITIAL_SIZE, sizeof(void *)); + if(!ht->objects) { + FREE(ht); + return NULL; + } + + ht->size = HANDLE_TABLE_INITIAL_SIZE; + ht->filled = 0; + + ht->destroy = NULL; + + return ht; +} + + +void +handle_table_set_destroy(struct handle_table *ht, + void (*destroy)(void *object)) +{ + assert(ht); + ht->destroy = destroy; +} + + +unsigned +handle_table_add(struct handle_table *ht, + void *object) +{ + unsigned index; + unsigned handle; + + assert(ht); + assert(object); + if(!object) + return 0; + + /* linear search for an empty handle */ + while(ht->filled < ht->size) { + if(!ht->objects[ht->filled]) + break; + ++ht->filled; + } + + /* grow the table */ + if(ht->filled == ht->size) { + unsigned new_size; + void **new_objects; + + new_size = ht->size*2; + assert(new_size); + + new_objects = (void **)REALLOC((void *)ht->objects, + ht->size*sizeof(void *), + new_size*sizeof(void *)); + if(!new_objects) + return 0; + + memset(new_objects + ht->size, 0, (new_size - ht->size)*sizeof(void *)); + + ht->size = new_size; + ht->objects = new_objects; + } + + index = ht->filled; + + handle = index + 1; + + /* check integer overflow */ + if(!handle) + return 0; + + assert(!ht->objects[index]); + ht->objects[index] = object; + ++ht->filled; + + return handle; +} + + +void * +handle_table_get(struct handle_table *ht, + unsigned handle) +{ + void *object; + + assert(ht); + assert(handle > 0); + assert(handle <= ht->size); + if(!handle || handle > ht->size) + return NULL; + + object = ht->objects[handle - 1]; + assert(object); + + return object; +} + + +void +handle_table_remove(struct handle_table *ht, + unsigned handle) +{ + void *object; + unsigned index; + + assert(ht); + assert(handle > 0); + assert(handle <= ht->size); + if(!handle || handle > ht->size) + return; + + index = handle - 1; + object = ht->objects[index]; + assert(object); + + if(object && ht->destroy) + ht->destroy(object); + + ht->objects[index] = NULL; + if(index < ht->filled) + ht->filled = index; +} + + +void +handle_table_destroy(struct handle_table *ht) +{ + unsigned index; + assert(ht); + + if(ht->destroy) + for(index = 0; index < ht->size; ++index) + if(ht->objects[index]) + ht->destroy(ht->objects[index]); + + FREE(ht->objects); + FREE(ht); +} + diff --git a/src/gallium/auxiliary/util/u_handle_table.h b/src/gallium/auxiliary/util/u_handle_table.h new file mode 100644 index 0000000000..51fc273865 --- /dev/null +++ b/src/gallium/auxiliary/util/u_handle_table.h @@ -0,0 +1,96 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Generic handle table. + * + * @author José Fonseca <jrfonseca@tungstengraphics.com> + */ + +#ifndef U_HANDLE_TABLE_H_ +#define U_HANDLE_TABLE_H_ + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Abstract data type to map integer handles to objects. + */ +struct handle_table; + + +struct handle_table * +handle_table_create(void); + + +/** + * Set an optional destructor callback. + * + * If set, it will be called during handle_table_remove and + * handle_table_destroy calls. + */ +void +handle_table_set_destroy(struct handle_table *ht, + void (*destroy)(void *object)); + + +/** + * Add a new object. + * + * Returns a zero handle on failure (out of memory). + */ +unsigned +handle_table_add(struct handle_table *ht, + void *object); + +/** + * Fetch an existing object. + * + * Returns NULL for an invalid handle. + */ +void * +handle_table_get(struct handle_table *ht, + unsigned handle); + + +void +handle_table_remove(struct handle_table *ht, + unsigned handle); + + +void +handle_table_destroy(struct handle_table *ht); + + +#ifdef __cplusplus +} +#endif + +#endif /* U_HANDLE_TABLE_H_ */ diff --git a/src/gallium/auxiliary/util/u_snprintf.c b/src/gallium/auxiliary/util/u_snprintf.c new file mode 100644 index 0000000000..61c20b48f7 --- /dev/null +++ b/src/gallium/auxiliary/util/u_snprintf.c @@ -0,0 +1,1478 @@ +/* + * Copyright (c) 1995 Patrick Powell. + * + * This code is based on code written by Patrick Powell <papowell@astart.com>. + * It may be used for any purpose as long as this notice remains intact on all + * source code distributions. + */ + +/* + * Copyright (c) 2008 Holger Weiss. + * + * This version of the code is maintained by Holger Weiss <holger@jhweiss.de>. + * My changes to the code may freely be used, modified and/or redistributed for + * any purpose. It would be nice if additions and fixes to this file (including + * trivial code cleanups) would be sent back in order to let me include them in + * the version available at <http://www.jhweiss.de/software/snprintf.html>. + * However, this is not a requirement for using or redistributing (possibly + * modified) versions of this file, nor is leaving this notice intact mandatory. + */ + +/* + * History + * + * 2008-01-20 Holger Weiss <holger@jhweiss.de> for C99-snprintf 1.1: + * + * Fixed the detection of infinite floating point values on IRIX (and + * possibly other systems) and applied another few minor cleanups. + * + * 2008-01-06 Holger Weiss <holger@jhweiss.de> for C99-snprintf 1.0: + * + * Added a lot of new features, fixed many bugs, and incorporated various + * improvements done by Andrew Tridgell <tridge@samba.org>, Russ Allbery + * <rra@stanford.edu>, Hrvoje Niksic <hniksic@xemacs.org>, Damien Miller + * <djm@mindrot.org>, and others for the Samba, INN, Wget, and OpenSSH + * projects. The additions include: support the "e", "E", "g", "G", and + * "F" conversion specifiers (and use conversion style "f" or "F" for the + * still unsupported "a" and "A" specifiers); support the "hh", "ll", "j", + * "t", and "z" length modifiers; support the "#" flag and the (non-C99) + * "'" flag; use localeconv(3) (if available) to get both the current + * locale's decimal point character and the separator between groups of + * digits; fix the handling of various corner cases of field width and + * precision specifications; fix various floating point conversion bugs; + * handle infinite and NaN floating point values; don't attempt to write to + * the output buffer (which may be NULL) if a size of zero was specified; + * check for integer overflow of the field width, precision, and return + * values and during the floating point conversion; use the OUTCHAR() macro + * instead of a function for better performance; provide asprintf(3) and + * vasprintf(3) functions; add new test cases. The replacement functions + * have been renamed to use an "rpl_" prefix, the function calls in the + * main project (and in this file) must be redefined accordingly for each + * replacement function which is needed (by using Autoconf or other means). + * Various other minor improvements have been applied and the coding style + * was cleaned up for consistency. + * + * 2007-07-23 Holger Weiss <holger@jhweiss.de> for Mutt 1.5.13: + * + * C99 compliant snprintf(3) and vsnprintf(3) functions return the number + * of characters that would have been written to a sufficiently sized + * buffer (excluding the '\0'). The original code simply returned the + * length of the resulting output string, so that's been fixed. + * + * 1998-03-05 Michael Elkins <me@mutt.org> for Mutt 0.90.8: + * + * The original code assumed that both snprintf(3) and vsnprintf(3) were + * missing. Some systems only have snprintf(3) but not vsnprintf(3), so + * the code is now broken down under HAVE_SNPRINTF and HAVE_VSNPRINTF. + * + * 1998-01-27 Thomas Roessler <roessler@does-not-exist.org> for Mutt 0.89i: + * + * The PGP code was using unsigned hexadecimal formats. Unfortunately, + * unsigned formats simply didn't work. + * + * 1997-10-22 Brandon Long <blong@fiction.net> for Mutt 0.87.1: + * + * Ok, added some minimal floating point support, which means this probably + * requires libm on most operating systems. Don't yet support the exponent + * (e,E) and sigfig (g,G). Also, fmtint() was pretty badly broken, it just + * wasn't being exercised in ways which showed it, so that's been fixed. + * Also, formatted the code to Mutt conventions, and removed dead code left + * over from the original. Also, there is now a builtin-test, run with: + * gcc -DTEST_SNPRINTF -o snprintf snprintf.c -lm && ./snprintf + * + * 2996-09-15 Brandon Long <blong@fiction.net> for Mutt 0.43: + * + * This was ugly. It is still ugly. I opted out of floating point + * numbers, but the formatter understands just about everything from the + * normal C string format, at least as far as I can tell from the Solaris + * 2.5 printf(3S) man page. + */ + +/* + * ToDo + * + * - Add wide character support. + * - Add support for "%a" and "%A" conversions. + * - Create test routines which predefine the expected results. Our test cases + * usually expose bugs in system implementations rather than in ours :-) + */ + +/* + * Usage + * + * 1) The following preprocessor macros should be defined to 1 if the feature or + * file in question is available on the target system (by using Autoconf or + * other means), though basic functionality should be available as long as + * HAVE_STDARG_H and HAVE_STDLIB_H are defined correctly: + * + * HAVE_VSNPRINTF + * HAVE_SNPRINTF + * HAVE_VASPRINTF + * HAVE_ASPRINTF + * HAVE_STDARG_H + * HAVE_STDDEF_H + * HAVE_STDINT_H + * HAVE_STDLIB_H + * HAVE_INTTYPES_H + * HAVE_LOCALE_H + * HAVE_LOCALECONV + * HAVE_LCONV_DECIMAL_POINT + * HAVE_LCONV_THOUSANDS_SEP + * HAVE_LONG_DOUBLE + * HAVE_LONG_LONG_INT + * HAVE_UNSIGNED_LONG_LONG_INT + * HAVE_INTMAX_T + * HAVE_UINTMAX_T + * HAVE_UINTPTR_T + * HAVE_PTRDIFF_T + * HAVE_VA_COPY + * HAVE___VA_COPY + * + * 2) The calls to the functions which should be replaced must be redefined + * throughout the project files (by using Autoconf or other means): + * + * #define vsnprintf rpl_vsnprintf + * #define snprintf rpl_snprintf + * #define vasprintf rpl_vasprintf + * #define asprintf rpl_asprintf + * + * 3) The required replacement functions should be declared in some header file + * included throughout the project files: + * + * #if HAVE_CONFIG_H + * #include <config.h> + * #endif + * #if HAVE_STDARG_H + * #include <stdarg.h> + * #if !HAVE_VSNPRINTF + * int rpl_vsnprintf(char *, size_t, const char *, va_list); + * #endif + * #if !HAVE_SNPRINTF + * int rpl_snprintf(char *, size_t, const char *, ...); + * #endif + * #if !HAVE_VASPRINTF + * int rpl_vasprintf(char **, const char *, va_list); + * #endif + * #if !HAVE_ASPRINTF + * int rpl_asprintf(char **, const char *, ...); + * #endif + * #endif + * + * Autoconf macros for handling step 1 and step 2 are available at + * <http://www.jhweiss.de/software/snprintf.html>. + */ + +#if HAVE_CONFIG_H +#include <config.h> +#else +#ifdef WIN32 +#define vsnprintf rpl_vsnprintf +#define snprintf rpl_snprintf +#define HAVE_VSNPRINTF 0 +#define HAVE_SNPRINTF 0 +#define HAVE_VASPRINTF 1 /* not needed */ +#define HAVE_ASPRINTF 1 /* not needed */ +#define HAVE_STDARG_H 1 +#define HAVE_STDDEF_H 1 +#define HAVE_STDINT_H 0 +#define HAVE_STDLIB_H 1 +#define HAVE_INTTYPES_H 0 +#define HAVE_LOCALE_H 0 +#define HAVE_LOCALECONV 0 +#define HAVE_LCONV_DECIMAL_POINT 0 +#define HAVE_LCONV_THOUSANDS_SEP 0 +#define HAVE_LONG_DOUBLE 0 +#define HAVE_LONG_LONG_INT 1 +#define HAVE_UNSIGNED_LONG_LONG_INT 1 +#define HAVE_INTMAX_T 0 +#define HAVE_UINTMAX_T 0 +#define HAVE_UINTPTR_T 1 +#define HAVE_PTRDIFF_T 1 +#define HAVE_VA_COPY 0 +#define HAVE___VA_COPY 0 +#else +#define HAVE_VSNPRINTF 1 +#define HAVE_SNPRINTF 1 +#define HAVE_VASPRINTF 1 +#define HAVE_ASPRINTF 1 +#endif +#endif /* HAVE_CONFIG_H */ + +#if !HAVE_SNPRINTF || !HAVE_VSNPRINTF || !HAVE_ASPRINTF || !HAVE_VASPRINTF +#include <stdio.h> /* For NULL, size_t, vsnprintf(3), and vasprintf(3). */ +#ifdef VA_START +#undef VA_START +#endif /* defined(VA_START) */ +#ifdef VA_SHIFT +#undef VA_SHIFT +#endif /* defined(VA_SHIFT) */ +#if HAVE_STDARG_H +#include <stdarg.h> +#define VA_START(ap, last) va_start(ap, last) +#define VA_SHIFT(ap, value, type) /* No-op for ANSI C. */ +#else /* Assume <varargs.h> is available. */ +#include <varargs.h> +#define VA_START(ap, last) va_start(ap) /* "last" is ignored. */ +#define VA_SHIFT(ap, value, type) value = va_arg(ap, type) +#endif /* HAVE_STDARG_H */ + +#if !HAVE_VASPRINTF +#if HAVE_STDLIB_H +#include <stdlib.h> /* For malloc(3). */ +#endif /* HAVE_STDLIB_H */ +#ifdef VA_COPY +#undef VA_COPY +#endif /* defined(VA_COPY) */ +#ifdef VA_END_COPY +#undef VA_END_COPY +#endif /* defined(VA_END_COPY) */ +#if HAVE_VA_COPY +#define VA_COPY(dest, src) va_copy(dest, src) +#define VA_END_COPY(ap) va_end(ap) +#elif HAVE___VA_COPY +#define VA_COPY(dest, src) __va_copy(dest, src) +#define VA_END_COPY(ap) va_end(ap) +#else +#define VA_COPY(dest, src) (void)mymemcpy(&dest, &src, sizeof(va_list)) +#define VA_END_COPY(ap) /* No-op. */ +#define NEED_MYMEMCPY 1 +static void *mymemcpy(void *, void *, size_t); +#endif /* HAVE_VA_COPY */ +#endif /* !HAVE_VASPRINTF */ + +#if !HAVE_VSNPRINTF +#include <limits.h> /* For *_MAX. */ +#if HAVE_INTTYPES_H +#include <inttypes.h> /* For intmax_t (if not defined in <stdint.h>). */ +#endif /* HAVE_INTTYPES_H */ +#if HAVE_LOCALE_H +#include <locale.h> /* For localeconv(3). */ +#endif /* HAVE_LOCALE_H */ +#if HAVE_STDDEF_H +#include <stddef.h> /* For ptrdiff_t. */ +#endif /* HAVE_STDDEF_H */ +#if HAVE_STDINT_H +#include <stdint.h> /* For intmax_t. */ +#endif /* HAVE_STDINT_H */ + +/* Support for unsigned long long int. We may also need ULLONG_MAX. */ +#ifndef ULONG_MAX /* We may need ULONG_MAX as a fallback. */ +#ifdef UINT_MAX +#define ULONG_MAX UINT_MAX +#else +#define ULONG_MAX INT_MAX +#endif /* defined(UINT_MAX) */ +#endif /* !defined(ULONG_MAX) */ +#ifdef ULLONG +#undef ULLONG +#endif /* defined(ULLONG) */ +#if HAVE_UNSIGNED_LONG_LONG_INT +#define ULLONG unsigned long long int +#ifndef ULLONG_MAX +#define ULLONG_MAX ULONG_MAX +#endif /* !defined(ULLONG_MAX) */ +#else +#define ULLONG unsigned long int +#ifdef ULLONG_MAX +#undef ULLONG_MAX +#endif /* defined(ULLONG_MAX) */ +#define ULLONG_MAX ULONG_MAX +#endif /* HAVE_LONG_LONG_INT */ + +/* Support for uintmax_t. We also need UINTMAX_MAX. */ +#ifdef UINTMAX_T +#undef UINTMAX_T +#endif /* defined(UINTMAX_T) */ +#if HAVE_UINTMAX_T || defined(uintmax_t) +#define UINTMAX_T uintmax_t +#ifndef UINTMAX_MAX +#define UINTMAX_MAX ULLONG_MAX +#endif /* !defined(UINTMAX_MAX) */ +#else +#define UINTMAX_T ULLONG +#ifdef UINTMAX_MAX +#undef UINTMAX_MAX +#endif /* defined(UINTMAX_MAX) */ +#define UINTMAX_MAX ULLONG_MAX +#endif /* HAVE_UINTMAX_T || defined(uintmax_t) */ + +/* Support for long double. */ +#ifndef LDOUBLE +#if HAVE_LONG_DOUBLE +#define LDOUBLE long double +#else +#define LDOUBLE double +#endif /* HAVE_LONG_DOUBLE */ +#endif /* !defined(LDOUBLE) */ + +/* Support for long long int. */ +#ifndef LLONG +#if HAVE_LONG_LONG_INT +#define LLONG long long int +#else +#define LLONG long int +#endif /* HAVE_LONG_LONG_INT */ +#endif /* !defined(LLONG) */ + +/* Support for intmax_t. */ +#ifndef INTMAX_T +#if HAVE_INTMAX_T || defined(intmax_t) +#define INTMAX_T intmax_t +#else +#define INTMAX_T LLONG +#endif /* HAVE_INTMAX_T || defined(intmax_t) */ +#endif /* !defined(INTMAX_T) */ + +/* Support for uintptr_t. */ +#ifndef UINTPTR_T +#if HAVE_UINTPTR_T || defined(uintptr_t) +#define UINTPTR_T uintptr_t +#else +#define UINTPTR_T unsigned long int +#endif /* HAVE_UINTPTR_T || defined(uintptr_t) */ +#endif /* !defined(UINTPTR_T) */ + +/* Support for ptrdiff_t. */ +#ifndef PTRDIFF_T +#if HAVE_PTRDIFF_T || defined(ptrdiff_t) +#define PTRDIFF_T ptrdiff_t +#else +#define PTRDIFF_T long int +#endif /* HAVE_PTRDIFF_T || defined(ptrdiff_t) */ +#endif /* !defined(PTRDIFF_T) */ + +/* + * We need an unsigned integer type corresponding to ptrdiff_t (cf. C99: + * 7.19.6.1, 7). However, we'll simply use PTRDIFF_T and convert it to an + * unsigned type if necessary. This should work just fine in practice. + */ +#ifndef UPTRDIFF_T +#define UPTRDIFF_T PTRDIFF_T +#endif /* !defined(UPTRDIFF_T) */ + +/* + * We need a signed integer type corresponding to size_t (cf. C99: 7.19.6.1, 7). + * However, we'll simply use size_t and convert it to a signed type if + * necessary. This should work just fine in practice. + */ +#ifndef SSIZE_T +#define SSIZE_T size_t +#endif /* !defined(SSIZE_T) */ + +/* Either ERANGE or E2BIG should be available everywhere. */ +#ifndef ERANGE +#define ERANGE E2BIG +#endif /* !defined(ERANGE) */ +#ifndef EOVERFLOW +#define EOVERFLOW ERANGE +#endif /* !defined(EOVERFLOW) */ + +/* + * Buffer size to hold the octal string representation of UINT128_MAX without + * nul-termination ("3777777777777777777777777777777777777777777"). + */ +#ifdef MAX_CONVERT_LENGTH +#undef MAX_CONVERT_LENGTH +#endif /* defined(MAX_CONVERT_LENGTH) */ +#define MAX_CONVERT_LENGTH 43 + +/* Format read states. */ +#define PRINT_S_DEFAULT 0 +#define PRINT_S_FLAGS 1 +#define PRINT_S_WIDTH 2 +#define PRINT_S_DOT 3 +#define PRINT_S_PRECISION 4 +#define PRINT_S_MOD 5 +#define PRINT_S_CONV 6 + +/* Format flags. */ +#define PRINT_F_MINUS (1 << 0) +#define PRINT_F_PLUS (1 << 1) +#define PRINT_F_SPACE (1 << 2) +#define PRINT_F_NUM (1 << 3) +#define PRINT_F_ZERO (1 << 4) +#define PRINT_F_QUOTE (1 << 5) +#define PRINT_F_UP (1 << 6) +#define PRINT_F_UNSIGNED (1 << 7) +#define PRINT_F_TYPE_G (1 << 8) +#define PRINT_F_TYPE_E (1 << 9) + +/* Conversion flags. */ +#define PRINT_C_CHAR 1 +#define PRINT_C_SHORT 2 +#define PRINT_C_LONG 3 +#define PRINT_C_LLONG 4 +#define PRINT_C_LDOUBLE 5 +#define PRINT_C_SIZE 6 +#define PRINT_C_PTRDIFF 7 +#define PRINT_C_INTMAX 8 + +#ifndef MAX +#define MAX(x, y) ((x >= y) ? x : y) +#endif /* !defined(MAX) */ +#ifndef CHARTOINT +#define CHARTOINT(ch) (ch - '0') +#endif /* !defined(CHARTOINT) */ +#ifndef ISDIGIT +#define ISDIGIT(ch) ('0' <= (unsigned char)ch && (unsigned char)ch <= '9') +#endif /* !defined(ISDIGIT) */ +#ifndef ISNAN +#define ISNAN(x) (x != x) +#endif /* !defined(ISNAN) */ +#ifndef ISINF +#define ISINF(x) (x != 0.0 && x + x == x) +#endif /* !defined(ISINF) */ + +#ifdef OUTCHAR +#undef OUTCHAR +#endif /* defined(OUTCHAR) */ +#define OUTCHAR(str, len, size, ch) \ +do { \ + if (len + 1 < size) \ + str[len] = ch; \ + (len)++; \ +} while (/* CONSTCOND */ 0) + +static void fmtstr(char *, size_t *, size_t, const char *, int, int, int); +static void fmtint(char *, size_t *, size_t, INTMAX_T, int, int, int, int); +static void fmtflt(char *, size_t *, size_t, LDOUBLE, int, int, int, int *); +static void printsep(char *, size_t *, size_t); +static int getnumsep(int); +static int getexponent(LDOUBLE); +static int convert(UINTMAX_T, char *, size_t, int, int); +static UINTMAX_T cast(LDOUBLE); +static UINTMAX_T myround(LDOUBLE); +static LDOUBLE mypow10(int); + +int +rpl_vsnprintf(char *str, size_t size, const char *format, va_list args) +{ + LDOUBLE fvalue; + INTMAX_T value; + unsigned char cvalue; + const char *strvalue; + INTMAX_T *intmaxptr; + PTRDIFF_T *ptrdiffptr; + SSIZE_T *sizeptr; + LLONG *llongptr; + long int *longptr; + int *intptr; + short int *shortptr; + signed char *charptr; + size_t len = 0; + int overflow = 0; + int base = 0; + int cflags = 0; + int flags = 0; + int width = 0; + int precision = -1; + int state = PRINT_S_DEFAULT; + char ch = *format++; + + /* + * C99 says: "If `n' is zero, nothing is written, and `s' may be a null + * pointer." (7.19.6.5, 2) We're forgiving and allow a NULL pointer + * even if a size larger than zero was specified. At least NetBSD's + * snprintf(3) does the same, as well as other versions of this file. + * (Though some of these versions will write to a non-NULL buffer even + * if a size of zero was specified, which violates the standard.) + */ + if (str == NULL && size != 0) + size = 0; + + while (ch != '\0') + switch (state) { + case PRINT_S_DEFAULT: + if (ch == '%') + state = PRINT_S_FLAGS; + else + OUTCHAR(str, len, size, ch); + ch = *format++; + break; + case PRINT_S_FLAGS: + switch (ch) { + case '-': + flags |= PRINT_F_MINUS; + ch = *format++; + break; + case '+': + flags |= PRINT_F_PLUS; + ch = *format++; + break; + case ' ': + flags |= PRINT_F_SPACE; + ch = *format++; + break; + case '#': + flags |= PRINT_F_NUM; + ch = *format++; + break; + case '0': + flags |= PRINT_F_ZERO; + ch = *format++; + break; + case '\'': /* SUSv2 flag (not in C99). */ + flags |= PRINT_F_QUOTE; + ch = *format++; + break; + default: + state = PRINT_S_WIDTH; + break; + } + break; + case PRINT_S_WIDTH: + if (ISDIGIT(ch)) { + ch = CHARTOINT(ch); + if (width > (INT_MAX - ch) / 10) { + overflow = 1; + goto out; + } + width = 10 * width + ch; + ch = *format++; + } else if (ch == '*') { + /* + * C99 says: "A negative field width argument is + * taken as a `-' flag followed by a positive + * field width." (7.19.6.1, 5) + */ + if ((width = va_arg(args, int)) < 0) { + flags |= PRINT_F_MINUS; + width = -width; + } + ch = *format++; + state = PRINT_S_DOT; + } else + state = PRINT_S_DOT; + break; + case PRINT_S_DOT: + if (ch == '.') { + state = PRINT_S_PRECISION; + ch = *format++; + } else + state = PRINT_S_MOD; + break; + case PRINT_S_PRECISION: + if (precision == -1) + precision = 0; + if (ISDIGIT(ch)) { + ch = CHARTOINT(ch); + if (precision > (INT_MAX - ch) / 10) { + overflow = 1; + goto out; + } + precision = 10 * precision + ch; + ch = *format++; + } else if (ch == '*') { + /* + * C99 says: "A negative precision argument is + * taken as if the precision were omitted." + * (7.19.6.1, 5) + */ + if ((precision = va_arg(args, int)) < 0) + precision = -1; + ch = *format++; + state = PRINT_S_MOD; + } else + state = PRINT_S_MOD; + break; + case PRINT_S_MOD: + switch (ch) { + case 'h': + ch = *format++; + if (ch == 'h') { /* It's a char. */ + ch = *format++; + cflags = PRINT_C_CHAR; + } else + cflags = PRINT_C_SHORT; + break; + case 'l': + ch = *format++; + if (ch == 'l') { /* It's a long long. */ + ch = *format++; + cflags = PRINT_C_LLONG; + } else + cflags = PRINT_C_LONG; + break; + case 'L': + cflags = PRINT_C_LDOUBLE; + ch = *format++; + break; + case 'j': + cflags = PRINT_C_INTMAX; + ch = *format++; + break; + case 't': + cflags = PRINT_C_PTRDIFF; + ch = *format++; + break; + case 'z': + cflags = PRINT_C_SIZE; + ch = *format++; + break; + } + state = PRINT_S_CONV; + break; + case PRINT_S_CONV: + switch (ch) { + case 'd': + /* FALLTHROUGH */ + case 'i': + switch (cflags) { + case PRINT_C_CHAR: + value = (signed char)va_arg(args, int); + break; + case PRINT_C_SHORT: + value = (short int)va_arg(args, int); + break; + case PRINT_C_LONG: + value = va_arg(args, long int); + break; + case PRINT_C_LLONG: + value = va_arg(args, LLONG); + break; + case PRINT_C_SIZE: + value = va_arg(args, SSIZE_T); + break; + case PRINT_C_INTMAX: + value = va_arg(args, INTMAX_T); + break; + case PRINT_C_PTRDIFF: + value = va_arg(args, PTRDIFF_T); + break; + default: + value = va_arg(args, int); + break; + } + fmtint(str, &len, size, value, 10, width, + precision, flags); + break; + case 'X': + flags |= PRINT_F_UP; + /* FALLTHROUGH */ + case 'x': + base = 16; + /* FALLTHROUGH */ + case 'o': + if (base == 0) + base = 8; + /* FALLTHROUGH */ + case 'u': + if (base == 0) + base = 10; + flags |= PRINT_F_UNSIGNED; + switch (cflags) { + case PRINT_C_CHAR: + value = (unsigned char)va_arg(args, + unsigned int); + break; + case PRINT_C_SHORT: + value = (unsigned short int)va_arg(args, + unsigned int); + break; + case PRINT_C_LONG: + value = va_arg(args, unsigned long int); + break; + case PRINT_C_LLONG: + value = va_arg(args, ULLONG); + break; + case PRINT_C_SIZE: + value = va_arg(args, size_t); + break; + case PRINT_C_INTMAX: + value = va_arg(args, UINTMAX_T); + break; + case PRINT_C_PTRDIFF: + value = va_arg(args, UPTRDIFF_T); + break; + default: + value = va_arg(args, unsigned int); + break; + } + fmtint(str, &len, size, value, base, width, + precision, flags); + break; + case 'A': + /* Not yet supported, we'll use "%F". */ + /* FALLTHROUGH */ + case 'F': + flags |= PRINT_F_UP; + case 'a': + /* Not yet supported, we'll use "%f". */ + /* FALLTHROUGH */ + case 'f': + if (cflags == PRINT_C_LDOUBLE) + fvalue = va_arg(args, LDOUBLE); + else + fvalue = va_arg(args, double); + fmtflt(str, &len, size, fvalue, width, + precision, flags, &overflow); + if (overflow) + goto out; + break; + case 'E': + flags |= PRINT_F_UP; + /* FALLTHROUGH */ + case 'e': + flags |= PRINT_F_TYPE_E; + if (cflags == PRINT_C_LDOUBLE) + fvalue = va_arg(args, LDOUBLE); + else + fvalue = va_arg(args, double); + fmtflt(str, &len, size, fvalue, width, + precision, flags, &overflow); + if (overflow) + goto out; + break; + case 'G': + flags |= PRINT_F_UP; + /* FALLTHROUGH */ + case 'g': + flags |= PRINT_F_TYPE_G; + if (cflags == PRINT_C_LDOUBLE) + fvalue = va_arg(args, LDOUBLE); + else + fvalue = va_arg(args, double); + /* + * If the precision is zero, it is treated as + * one (cf. C99: 7.19.6.1, 8). + */ + if (precision == 0) + precision = 1; + fmtflt(str, &len, size, fvalue, width, + precision, flags, &overflow); + if (overflow) + goto out; + break; + case 'c': + cvalue = (unsigned char)va_arg(args, int); + OUTCHAR(str, len, size, cvalue); + break; + case 's': + strvalue = va_arg(args, char *); + fmtstr(str, &len, size, strvalue, width, + precision, flags); + break; + case 'p': + /* + * C99 says: "The value of the pointer is + * converted to a sequence of printing + * characters, in an implementation-defined + * manner." (C99: 7.19.6.1, 8) + */ + if ((strvalue = va_arg(args, void *)) == NULL) + /* + * We use the glibc format. BSD prints + * "0x0", SysV "0". + */ + fmtstr(str, &len, size, "(nil)", width, + -1, flags); + else { + /* + * We use the BSD/glibc format. SysV + * omits the "0x" prefix (which we emit + * using the PRINT_F_NUM flag). + */ + flags |= PRINT_F_NUM; + flags |= PRINT_F_UNSIGNED; + fmtint(str, &len, size, + (UINTPTR_T)strvalue, 16, width, + precision, flags); + } + break; + case 'n': + switch (cflags) { + case PRINT_C_CHAR: + charptr = va_arg(args, signed char *); + *charptr = len; + break; + case PRINT_C_SHORT: + shortptr = va_arg(args, short int *); + *shortptr = len; + break; + case PRINT_C_LONG: + longptr = va_arg(args, long int *); + *longptr = len; + break; + case PRINT_C_LLONG: + llongptr = va_arg(args, LLONG *); + *llongptr = len; + break; + case PRINT_C_SIZE: + /* + * C99 says that with the "z" length + * modifier, "a following `n' conversion + * specifier applies to a pointer to a + * signed integer type corresponding to + * size_t argument." (7.19.6.1, 7) + */ + sizeptr = va_arg(args, SSIZE_T *); + *sizeptr = len; + break; + case PRINT_C_INTMAX: + intmaxptr = va_arg(args, INTMAX_T *); + *intmaxptr = len; + break; + case PRINT_C_PTRDIFF: + ptrdiffptr = va_arg(args, PTRDIFF_T *); + *ptrdiffptr = len; + break; + default: + intptr = va_arg(args, int *); + *intptr = len; + break; + } + break; + case '%': /* Print a "%" character verbatim. */ + OUTCHAR(str, len, size, ch); + break; + default: /* Skip other characters. */ + break; + } + ch = *format++; + state = PRINT_S_DEFAULT; + base = cflags = flags = width = 0; + precision = -1; + break; + } +out: + if (len < size) + str[len] = '\0'; + else if (size > 0) + str[size - 1] = '\0'; + + if (overflow || len >= INT_MAX) { + return -1; + } + return (int)len; +} + +static void +fmtstr(char *str, size_t *len, size_t size, const char *value, int width, + int precision, int flags) +{ + int padlen, strln; /* Amount to pad. */ + int noprecision = (precision == -1); + + if (value == NULL) /* We're forgiving. */ + value = "(null)"; + + /* If a precision was specified, don't read the string past it. */ + for (strln = 0; value[strln] != '\0' && + (noprecision || strln < precision); strln++) + continue; + + if ((padlen = width - strln) < 0) + padlen = 0; + if (flags & PRINT_F_MINUS) /* Left justify. */ + padlen = -padlen; + + while (padlen > 0) { /* Leading spaces. */ + OUTCHAR(str, *len, size, ' '); + padlen--; + } + while (*value != '\0' && (noprecision || precision-- > 0)) { + OUTCHAR(str, *len, size, *value); + value++; + } + while (padlen < 0) { /* Trailing spaces. */ + OUTCHAR(str, *len, size, ' '); + padlen++; + } +} + +static void +fmtint(char *str, size_t *len, size_t size, INTMAX_T value, int base, int width, + int precision, int flags) +{ + UINTMAX_T uvalue; + char iconvert[MAX_CONVERT_LENGTH]; + char sign = 0; + char hexprefix = 0; + int spadlen = 0; /* Amount to space pad. */ + int zpadlen = 0; /* Amount to zero pad. */ + int pos; + int separators = (flags & PRINT_F_QUOTE); + int noprecision = (precision == -1); + + if (flags & PRINT_F_UNSIGNED) + uvalue = value; + else { + uvalue = (value >= 0) ? value : -value; + if (value < 0) + sign = '-'; + else if (flags & PRINT_F_PLUS) /* Do a sign. */ + sign = '+'; + else if (flags & PRINT_F_SPACE) + sign = ' '; + } + + pos = convert(uvalue, iconvert, sizeof(iconvert), base, + flags & PRINT_F_UP); + + if (flags & PRINT_F_NUM && uvalue != 0) { + /* + * C99 says: "The result is converted to an `alternative form'. + * For `o' conversion, it increases the precision, if and only + * if necessary, to force the first digit of the result to be a + * zero (if the value and precision are both 0, a single 0 is + * printed). For `x' (or `X') conversion, a nonzero result has + * `0x' (or `0X') prefixed to it." (7.19.6.1, 6) + */ + switch (base) { + case 8: + if (precision <= pos) + precision = pos + 1; + break; + case 16: + hexprefix = (flags & PRINT_F_UP) ? 'X' : 'x'; + break; + } + } + + if (separators) /* Get the number of group separators we'll print. */ + separators = getnumsep(pos); + + zpadlen = precision - pos - separators; + spadlen = width /* Minimum field width. */ + - separators /* Number of separators. */ + - MAX(precision, pos) /* Number of integer digits. */ + - ((sign != 0) ? 1 : 0) /* Will we print a sign? */ + - ((hexprefix != 0) ? 2 : 0); /* Will we print a prefix? */ + + if (zpadlen < 0) + zpadlen = 0; + if (spadlen < 0) + spadlen = 0; + + /* + * C99 says: "If the `0' and `-' flags both appear, the `0' flag is + * ignored. For `d', `i', `o', `u', `x', and `X' conversions, if a + * precision is specified, the `0' flag is ignored." (7.19.6.1, 6) + */ + if (flags & PRINT_F_MINUS) /* Left justify. */ + spadlen = -spadlen; + else if (flags & PRINT_F_ZERO && noprecision) { + zpadlen += spadlen; + spadlen = 0; + } + while (spadlen > 0) { /* Leading spaces. */ + OUTCHAR(str, *len, size, ' '); + spadlen--; + } + if (sign != 0) /* Sign. */ + OUTCHAR(str, *len, size, sign); + if (hexprefix != 0) { /* A "0x" or "0X" prefix. */ + OUTCHAR(str, *len, size, '0'); + OUTCHAR(str, *len, size, hexprefix); + } + while (zpadlen > 0) { /* Leading zeros. */ + OUTCHAR(str, *len, size, '0'); + zpadlen--; + } + while (pos > 0) { /* The actual digits. */ + pos--; + OUTCHAR(str, *len, size, iconvert[pos]); + if (separators > 0 && pos > 0 && pos % 3 == 0) + printsep(str, len, size); + } + while (spadlen < 0) { /* Trailing spaces. */ + OUTCHAR(str, *len, size, ' '); + spadlen++; + } +} + +static void +fmtflt(char *str, size_t *len, size_t size, LDOUBLE fvalue, int width, + int precision, int flags, int *overflow) +{ + LDOUBLE ufvalue; + UINTMAX_T intpart; + UINTMAX_T fracpart; + UINTMAX_T mask; + const char *infnan = NULL; + char iconvert[MAX_CONVERT_LENGTH]; + char fconvert[MAX_CONVERT_LENGTH]; + char econvert[4]; /* "e-12" (without nul-termination). */ + char esign = 0; + char sign = 0; + int leadfraczeros = 0; + int exponent = 0; + int emitpoint = 0; + int omitzeros = 0; + int omitcount = 0; + int padlen = 0; + int epos = 0; + int fpos = 0; + int ipos = 0; + int separators = (flags & PRINT_F_QUOTE); + int estyle = (flags & PRINT_F_TYPE_E); +#if HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT + struct lconv *lc = localeconv(); +#endif /* HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT */ + + /* + * AIX' man page says the default is 0, but C99 and at least Solaris' + * and NetBSD's man pages say the default is 6, and sprintf(3) on AIX + * defaults to 6. + */ + if (precision == -1) + precision = 6; + + if (fvalue < 0.0) + sign = '-'; + else if (flags & PRINT_F_PLUS) /* Do a sign. */ + sign = '+'; + else if (flags & PRINT_F_SPACE) + sign = ' '; + + if (ISNAN(fvalue)) + infnan = (flags & PRINT_F_UP) ? "NAN" : "nan"; + else if (ISINF(fvalue)) + infnan = (flags & PRINT_F_UP) ? "INF" : "inf"; + + if (infnan != NULL) { + if (sign != 0) + iconvert[ipos++] = sign; + while (*infnan != '\0') + iconvert[ipos++] = *infnan++; + fmtstr(str, len, size, iconvert, width, ipos, flags); + return; + } + + /* "%e" (or "%E") or "%g" (or "%G") conversion. */ + if (flags & PRINT_F_TYPE_E || flags & PRINT_F_TYPE_G) { + if (flags & PRINT_F_TYPE_G) { + /* + * For "%g" (and "%G") conversions, the precision + * specifies the number of significant digits, which + * includes the digits in the integer part. The + * conversion will or will not be using "e-style" (like + * "%e" or "%E" conversions) depending on the precision + * and on the exponent. However, the exponent can be + * affected by rounding the converted value, so we'll + * leave this decision for later. Until then, we'll + * assume that we're going to do an "e-style" conversion + * (in order to get the exponent calculated). For + * "e-style", the precision must be decremented by one. + */ + precision--; + /* + * For "%g" (and "%G") conversions, trailing zeros are + * removed from the fractional portion of the result + * unless the "#" flag was specified. + */ + if (!(flags & PRINT_F_NUM)) + omitzeros = 1; + } + exponent = getexponent(fvalue); + estyle = 1; + } + +again: + /* + * Sorry, we only support 9, 19, or 38 digits (that is, the number of + * digits of the 32-bit, the 64-bit, or the 128-bit UINTMAX_MAX value + * minus one) past the decimal point due to our conversion method. + */ + switch (sizeof(UINTMAX_T)) { + case 16: + if (precision > 38) + precision = 38; + break; + case 8: + if (precision > 19) + precision = 19; + break; + default: + if (precision > 9) + precision = 9; + break; + } + + ufvalue = (fvalue >= 0.0) ? fvalue : -fvalue; + if (estyle) /* We want exactly one integer digit. */ + ufvalue /= mypow10(exponent); + + if ((intpart = cast(ufvalue)) == UINTMAX_MAX) { + *overflow = 1; + return; + } + + /* + * Factor of ten with the number of digits needed for the fractional + * part. For example, if the precision is 3, the mask will be 1000. + */ + mask = (UINTMAX_T)mypow10(precision); + /* + * We "cheat" by converting the fractional part to integer by + * multiplying by a factor of ten. + */ + if ((fracpart = myround(mask * (ufvalue - intpart))) >= mask) { + /* + * For example, ufvalue = 2.99962, intpart = 2, and mask = 1000 + * (because precision = 3). Now, myround(1000 * 0.99962) will + * return 1000. So, the integer part must be incremented by one + * and the fractional part must be set to zero. + */ + intpart++; + fracpart = 0; + if (estyle && intpart == 10) { + /* + * The value was rounded up to ten, but we only want one + * integer digit if using "e-style". So, the integer + * part must be set to one and the exponent must be + * incremented by one. + */ + intpart = 1; + exponent++; + } + } + + /* + * Now that we know the real exponent, we can check whether or not to + * use "e-style" for "%g" (and "%G") conversions. If we don't need + * "e-style", the precision must be adjusted and the integer and + * fractional parts must be recalculated from the original value. + * + * C99 says: "Let P equal the precision if nonzero, 6 if the precision + * is omitted, or 1 if the precision is zero. Then, if a conversion + * with style `E' would have an exponent of X: + * + * - if P > X >= -4, the conversion is with style `f' (or `F') and + * precision P - (X + 1). + * + * - otherwise, the conversion is with style `e' (or `E') and precision + * P - 1." (7.19.6.1, 8) + * + * Note that we had decremented the precision by one. + */ + if (flags & PRINT_F_TYPE_G && estyle && + precision + 1 > exponent && exponent >= -4) { + precision -= exponent; + estyle = 0; + goto again; + } + + if (estyle) { + if (exponent < 0) { + exponent = -exponent; + esign = '-'; + } else + esign = '+'; + + /* + * Convert the exponent. The sizeof(econvert) is 4. So, the + * econvert buffer can hold e.g. "e+99" and "e-99". We don't + * support an exponent which contains more than two digits. + * Therefore, the following stores are safe. + */ + epos = convert(exponent, econvert, 2, 10, 0); + /* + * C99 says: "The exponent always contains at least two digits, + * and only as many more digits as necessary to represent the + * exponent." (7.19.6.1, 8) + */ + if (epos == 1) + econvert[epos++] = '0'; + econvert[epos++] = esign; + econvert[epos++] = (flags & PRINT_F_UP) ? 'E' : 'e'; + } + + /* Convert the integer part and the fractional part. */ + ipos = convert(intpart, iconvert, sizeof(iconvert), 10, 0); + if (fracpart != 0) /* convert() would return 1 if fracpart == 0. */ + fpos = convert(fracpart, fconvert, sizeof(fconvert), 10, 0); + + leadfraczeros = precision - fpos; + + if (omitzeros) { + if (fpos > 0) /* Omit trailing fractional part zeros. */ + while (omitcount < fpos && fconvert[omitcount] == '0') + omitcount++; + else { /* The fractional part is zero, omit it completely. */ + omitcount = precision; + leadfraczeros = 0; + } + precision -= omitcount; + } + + /* + * Print a decimal point if either the fractional part is non-zero + * and/or the "#" flag was specified. + */ + if (precision > 0 || flags & PRINT_F_NUM) + emitpoint = 1; + if (separators) /* Get the number of group separators we'll print. */ + separators = getnumsep(ipos); + + padlen = width /* Minimum field width. */ + - ipos /* Number of integer digits. */ + - epos /* Number of exponent characters. */ + - precision /* Number of fractional digits. */ + - separators /* Number of group separators. */ + - (emitpoint ? 1 : 0) /* Will we print a decimal point? */ + - ((sign != 0) ? 1 : 0); /* Will we print a sign character? */ + + if (padlen < 0) + padlen = 0; + + /* + * C99 says: "If the `0' and `-' flags both appear, the `0' flag is + * ignored." (7.19.6.1, 6) + */ + if (flags & PRINT_F_MINUS) /* Left justifty. */ + padlen = -padlen; + else if (flags & PRINT_F_ZERO && padlen > 0) { + if (sign != 0) { /* Sign. */ + OUTCHAR(str, *len, size, sign); + sign = 0; + } + while (padlen > 0) { /* Leading zeros. */ + OUTCHAR(str, *len, size, '0'); + padlen--; + } + } + while (padlen > 0) { /* Leading spaces. */ + OUTCHAR(str, *len, size, ' '); + padlen--; + } + if (sign != 0) /* Sign. */ + OUTCHAR(str, *len, size, sign); + while (ipos > 0) { /* Integer part. */ + ipos--; + OUTCHAR(str, *len, size, iconvert[ipos]); + if (separators > 0 && ipos > 0 && ipos % 3 == 0) + printsep(str, len, size); + } + if (emitpoint) { /* Decimal point. */ +#if HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT + if (lc->decimal_point != NULL && *lc->decimal_point != '\0') + OUTCHAR(str, *len, size, *lc->decimal_point); + else /* We'll always print some decimal point character. */ +#endif /* HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT */ + OUTCHAR(str, *len, size, '.'); + } + while (leadfraczeros > 0) { /* Leading fractional part zeros. */ + OUTCHAR(str, *len, size, '0'); + leadfraczeros--; + } + while (fpos > omitcount) { /* The remaining fractional part. */ + fpos--; + OUTCHAR(str, *len, size, fconvert[fpos]); + } + while (epos > 0) { /* Exponent. */ + epos--; + OUTCHAR(str, *len, size, econvert[epos]); + } + while (padlen < 0) { /* Trailing spaces. */ + OUTCHAR(str, *len, size, ' '); + padlen++; + } +} + +static void +printsep(char *str, size_t *len, size_t size) +{ +#if HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP + struct lconv *lc = localeconv(); + int i; + + if (lc->thousands_sep != NULL) + for (i = 0; lc->thousands_sep[i] != '\0'; i++) + OUTCHAR(str, *len, size, lc->thousands_sep[i]); + else +#endif /* HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP */ + OUTCHAR(str, *len, size, ','); +} + +static int +getnumsep(int digits) +{ + int separators = (digits - ((digits % 3 == 0) ? 1 : 0)) / 3; +#if HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP + int strln; + struct lconv *lc = localeconv(); + + /* We support an arbitrary separator length (including zero). */ + if (lc->thousands_sep != NULL) { + for (strln = 0; lc->thousands_sep[strln] != '\0'; strln++) + continue; + separators *= strln; + } +#endif /* HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP */ + return separators; +} + +static int +getexponent(LDOUBLE value) +{ + LDOUBLE tmp = (value >= 0.0) ? value : -value; + int exponent = 0; + + /* + * We check for 99 > exponent > -99 in order to work around possible + * endless loops which could happen (at least) in the second loop (at + * least) if we're called with an infinite value. However, we checked + * for infinity before calling this function using our ISINF() macro, so + * this might be somewhat paranoid. + */ + while (tmp < 1.0 && tmp > 0.0 && --exponent > -99) + tmp *= 10; + while (tmp >= 10.0 && ++exponent < 99) + tmp /= 10; + + return exponent; +} + +static int +convert(UINTMAX_T value, char *buf, size_t size, int base, int caps) +{ + const char *digits = caps ? "0123456789ABCDEF" : "0123456789abcdef"; + size_t pos = 0; + + /* We return an unterminated buffer with the digits in reverse order. */ + do { + buf[pos++] = digits[value % base]; + value /= base; + } while (value != 0 && pos < size); + + return (int)pos; +} + +static UINTMAX_T +cast(LDOUBLE value) +{ + UINTMAX_T result; + + /* + * We check for ">=" and not for ">" because if UINTMAX_MAX cannot be + * represented exactly as an LDOUBLE value (but is less than LDBL_MAX), + * it may be increased to the nearest higher representable value for the + * comparison (cf. C99: 6.3.1.4, 2). It might then equal the LDOUBLE + * value although converting the latter to UINTMAX_T would overflow. + */ + if (value >= UINTMAX_MAX) + return UINTMAX_MAX; + + result = (UINTMAX_T)value; + /* + * At least on NetBSD/sparc64 3.0.2 and 4.99.30, casting long double to + * an integer type converts e.g. 1.9 to 2 instead of 1 (which violates + * the standard). Sigh. + */ + return (result <= value) ? result : result - 1; +} + +static UINTMAX_T +myround(LDOUBLE value) +{ + UINTMAX_T intpart = cast(value); + + return ((value -= intpart) < 0.5) ? intpart : intpart + 1; +} + +static LDOUBLE +mypow10(int exponent) +{ + LDOUBLE result = 1; + + while (exponent > 0) { + result *= 10; + exponent--; + } + while (exponent < 0) { + result /= 10; + exponent++; + } + return result; +} +#endif /* !HAVE_VSNPRINTF */ + +#if !HAVE_VASPRINTF +#if NEED_MYMEMCPY +void * +mymemcpy(void *dst, void *src, size_t len) +{ + const char *from = src; + char *to = dst; + + /* No need for optimization, we use this only to replace va_copy(3). */ + while (len-- > 0) + *to++ = *from++; + return dst; +} +#endif /* NEED_MYMEMCPY */ + +int +rpl_vasprintf(char **ret, const char *format, va_list ap) +{ + size_t size; + int len; + va_list aq; + + VA_COPY(aq, ap); + len = vsnprintf(NULL, 0, format, aq); + VA_END_COPY(aq); + if (len < 0 || (*ret = malloc(size = len + 1)) == NULL) + return -1; + return vsnprintf(*ret, size, format, ap); +} +#endif /* !HAVE_VASPRINTF */ + +#if !HAVE_SNPRINTF +#if HAVE_STDARG_H +int +rpl_snprintf(char *str, size_t size, const char *format, ...) +#else +int +rpl_snprintf(va_alist) va_dcl +#endif /* HAVE_STDARG_H */ +{ +#if !HAVE_STDARG_H + char *str; + size_t size; + char *format; +#endif /* HAVE_STDARG_H */ + va_list ap; + int len; + + VA_START(ap, format); + VA_SHIFT(ap, str, char *); + VA_SHIFT(ap, size, size_t); + VA_SHIFT(ap, format, const char *); + len = vsnprintf(str, size, format, ap); + va_end(ap); + return len; +} +#endif /* !HAVE_SNPRINTF */ + +#if !HAVE_ASPRINTF +#if HAVE_STDARG_H +int +rpl_asprintf(char **ret, const char *format, ...) +#else +int +rpl_asprintf(va_alist) va_dcl +#endif /* HAVE_STDARG_H */ +{ +#if !HAVE_STDARG_H + char **ret; + char *format; +#endif /* HAVE_STDARG_H */ + va_list ap; + int len; + + VA_START(ap, format); + VA_SHIFT(ap, ret, char **); + VA_SHIFT(ap, format, const char *); + len = vasprintf(ret, format, ap); + va_end(ap); + return len; +} +#endif /* !HAVE_ASPRINTF */ +#else /* Dummy declaration to avoid empty translation unit warnings. */ +int main(void); +#endif /* !HAVE_SNPRINTF || !HAVE_VSNPRINTF || !HAVE_ASPRINTF || [...] */ + + +/* vim: set joinspaces textwidth=80: */ diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index bf27289f3f..c568922cbd 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -63,6 +63,7 @@ struct cell_context const struct pipe_blend_state *blend; const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + uint num_samplers; const struct pipe_depth_stencil_alpha_state *depth_stencil; const struct pipe_rasterizer_state *rasterizer; const struct cell_vertex_shader_state *vs; @@ -75,6 +76,7 @@ struct cell_context struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; struct cell_texture *texture[PIPE_MAX_SAMPLERS]; + uint num_textures; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX]; diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index 075e0a0c47..025ed3bbbf 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -210,15 +210,19 @@ cell_create_sampler_state(struct pipe_context *pipe, static void -cell_bind_sampler_state(struct pipe_context *pipe, - unsigned unit, void *sampler) +cell_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **samplers) { struct cell_context *cell = cell_context(pipe); draw_flush(cell->draw); assert(unit < PIPE_MAX_SAMPLERS); - cell->sampler[unit] = (struct pipe_sampler_state *)sampler; + + memcpy(cell->sampler, samplers, num * sizeof(void *)); + memset(&cell->sampler[num], 0, (PIPE_MAX_SAMPLERS - num) * + sizeof(void *)); + cell->num_samplers = num; cell->dirty |= CELL_NEW_SAMPLER; } @@ -234,16 +238,24 @@ cell_delete_sampler_state(struct pipe_context *pipe, static void -cell_set_sampler_texture(struct pipe_context *pipe, - unsigned sampler, - struct pipe_texture *texture) +cell_set_sampler_textures(struct pipe_context *pipe, + unsigned num, struct pipe_texture **texture) { struct cell_context *cell = cell_context(pipe); + uint i; + + /* Check for no-op */ + if (num == cell->num_textures && + !memcmp(cell->texture, texture, num * sizeof(struct pipe_texture *))) + return; draw_flush(cell->draw); - pipe_texture_reference((struct pipe_texture **) &cell->texture[sampler], - texture); + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + struct pipe_texture *tex = i < num ? texture[i] : NULL; + + pipe_texture_reference((struct pipe_texture **) &cell->texture[i], tex); + } cell_update_texture_mapping(cell); @@ -300,10 +312,10 @@ cell_init_state_functions(struct cell_context *cell) cell->pipe.delete_blend_state = cell_delete_blend_state; cell->pipe.create_sampler_state = cell_create_sampler_state; - cell->pipe.bind_sampler_state = cell_bind_sampler_state; + cell->pipe.bind_sampler_states = cell_bind_sampler_states; cell->pipe.delete_sampler_state = cell_delete_sampler_state; - cell->pipe.set_sampler_texture = cell_set_sampler_texture; + cell->pipe.set_sampler_textures = cell_set_sampler_textures; cell->pipe.create_depth_stencil_alpha_state = cell_create_depth_stencil_alpha_state; cell->pipe.bind_depth_stencil_alpha_state = cell_bind_depth_stencil_alpha_state; diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 49c0d130c5..670eb26bdd 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -77,8 +77,10 @@ cell_emit_state(struct cell_context *cell) } if (cell->dirty & CELL_NEW_SAMPLER) { - emit_state_cmd(cell, CELL_CMD_STATE_SAMPLER, - cell->sampler[0], sizeof(struct pipe_sampler_state)); + if (cell->sampler[0]) { + emit_state_cmd(cell, CELL_CMD_STATE_SAMPLER, + cell->sampler[0], sizeof(struct pipe_sampler_state)); + } } if (cell->dirty & CELL_NEW_TEXTURE) { diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index 935501441b..269a5c15ba 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -131,7 +131,8 @@ cell_bind_vs_state(struct pipe_context *pipe, void *vs) cell->vs = (const struct cell_vertex_shader_state *) vs; - draw_bind_vertex_shader(cell->draw, cell->vs->draw_data); + draw_bind_vertex_shader(cell->draw, + (cell->vs ? cell->vs->draw_data : NULL)); cell->dirty |= CELL_NEW_VS; } diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c index 9cf74bab47..4828a8023b 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c @@ -33,46 +33,11 @@ #include "cell_context.h" #include "rtasm/rtasm_ppc_spe.h" -typedef uint64_t register_mask; - -int allocate_available_register(register_mask *m) -{ - unsigned i; - for (i = 0; i < 64; i++) { - const uint64_t mask = (1ULL << i); - - if ((m[0] & mask) != 0) { - m[0] &= ~mask; - return i; - } - } - - return -1; -} - - -int allocate_register(register_mask *m, unsigned reg) -{ - assert((m[0] & (1ULL << reg)) != 0); - - m[0] &= ~(1ULL << reg); - return reg; -} - - -void release_register(register_mask *m, unsigned reg) -{ - assert((m[0] & (1ULL << reg)) == 0); - - m[0] |= (1ULL << reg); -} - /** * Emit a 4x4 matrix transpose operation * * \param p Function that the transpose operation is to be appended to - * \param m Live register mask * \param row0 Register containing row 0 of the source matrix * \param row1 Register containing row 1 of the source matrix * \param row2 Register containing row 2 of the source matrix @@ -91,15 +56,15 @@ void release_register(register_mask *m, unsigned reg) * This function requires that four temporary are available on entry. */ static void -emit_matrix_transpose(struct spe_function *p, register_mask *m, +emit_matrix_transpose(struct spe_function *p, unsigned row0, unsigned row1, unsigned row2, unsigned row3, unsigned dest_ptr, unsigned shuf_ptr, unsigned count) { - int shuf_hi = allocate_available_register(m); - int shuf_lo = allocate_available_register(m); - int t1 = allocate_available_register(m); - int t2 = allocate_available_register(m); + int shuf_hi = spe_allocate_available_register(p); + int shuf_lo = spe_allocate_available_register(p); + int t1 = spe_allocate_available_register(p); + int t2 = spe_allocate_available_register(p); int t3; int t4; int col0; @@ -169,19 +134,19 @@ emit_matrix_transpose(struct spe_function *p, register_mask *m, /* Release all of the temporary registers used. */ - release_register(m, col0); - release_register(m, col1); - release_register(m, col2); - release_register(m, col3); - release_register(m, shuf_hi); - release_register(m, shuf_lo); - release_register(m, t2); - release_register(m, t4); + spe_release_register(p, col0); + spe_release_register(p, col1); + spe_release_register(p, col2); + spe_release_register(p, col3); + spe_release_register(p, shuf_hi); + spe_release_register(p, shuf_lo); + spe_release_register(p, t2); + spe_release_register(p, t4); } static void -emit_fetch(struct spe_function *p, register_mask *m, +emit_fetch(struct spe_function *p, unsigned in_ptr, unsigned *offset, unsigned out_ptr, unsigned shuf_ptr, enum pipe_format format) @@ -191,11 +156,11 @@ emit_fetch(struct spe_function *p, register_mask *m, const unsigned type = pf_type(format); const unsigned bytes = pf_size_x(format); - int v0 = allocate_available_register(m); - int v1 = allocate_available_register(m); - int v2 = allocate_available_register(m); - int v3 = allocate_available_register(m); - int tmp = allocate_available_register(m); + int v0 = spe_allocate_available_register(p); + int v1 = spe_allocate_available_register(p); + int v2 = spe_allocate_available_register(p); + int v3 = spe_allocate_available_register(p); + int tmp = spe_allocate_available_register(p); int float_zero = -1; int float_one = -1; float scale_signed = 0.0; @@ -260,19 +225,19 @@ emit_fetch(struct spe_function *p, register_mask *m, if (count < 4) { - float_one = allocate_available_register(m); + float_one = spe_allocate_available_register(p); spe_il(p, float_one, 1); spe_cuflt(p, float_one, float_one, 0); if (count < 3) { - float_zero = allocate_available_register(m); + float_zero = spe_allocate_available_register(p); spe_il(p, float_zero, 0); } } - release_register(m, tmp); + spe_release_register(p, tmp); - emit_matrix_transpose(p, m, v0, v1, v2, v3, out_ptr, shuf_ptr, count); + emit_matrix_transpose(p, v0, v1, v2, v3, out_ptr, shuf_ptr, count); switch (count) { case 1: @@ -284,11 +249,11 @@ emit_fetch(struct spe_function *p, register_mask *m, } if (float_zero != -1) { - release_register(m, float_zero); + spe_release_register(p, float_zero); } if (float_one != -1) { - release_register(m, float_one); + spe_release_register(p, float_one); } } @@ -297,7 +262,6 @@ void cell_update_vertex_fetch(struct draw_context *draw) { struct cell_context *const cell = (struct cell_context *) draw->driver_private; - register_mask m = ~0; struct spe_function *p = &cell->attrib_fetch; unsigned function_index[PIPE_ATTRIB_MAX]; unsigned unique_attr_formats; @@ -338,18 +302,11 @@ void cell_update_vertex_fetch(struct draw_context *draw) spe_init_func(p, 136 * unique_attr_formats); - /* Registers 0, 1, and 2 are reserved by the ABI. - */ - allocate_register(&m, 0); - allocate_register(&m, 1); - allocate_register(&m, 2); - - /* Allocate registers for the function's input parameters. */ - out_ptr = allocate_register(&m, 3); - in_ptr = allocate_register(&m, 4); - shuf_ptr = allocate_register(&m, 5); + out_ptr = spe_allocate_register(p, 3); + in_ptr = spe_allocate_register(p, 4); + shuf_ptr = spe_allocate_register(p, 5); /* Generate code for the individual attribute fetch functions. @@ -362,7 +319,7 @@ void cell_update_vertex_fetch(struct draw_context *draw) - (void *) p->store); offset = 0; - emit_fetch(p, & m, in_ptr, &offset, out_ptr, shuf_ptr, + emit_fetch(p, in_ptr, &offset, out_ptr, shuf_ptr, draw->vertex_element[i].src_format); spe_bi(p, 0, 0, 0); diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h index 1dc87291c9..8f3ad3ee79 100644 --- a/src/gallium/drivers/failover/fo_context.h +++ b/src/gallium/drivers/failover/fo_context.h @@ -87,12 +87,15 @@ struct failover_context { struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX]; + void *sw_sampler_state[PIPE_MAX_SAMPLERS]; + void *hw_sampler_state[PIPE_MAX_SAMPLERS]; + unsigned dirty; - unsigned dirty_sampler; - unsigned dirty_texture; unsigned dirty_vertex_buffer; unsigned dirty_vertex_element; + unsigned num_samplers; + unsigned num_textures; unsigned mode; struct pipe_context *hw; diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c index 0fc5568da1..11eec2714e 100644 --- a/src/gallium/drivers/failover/fo_state.c +++ b/src/gallium/drivers/failover/fo_state.c @@ -28,6 +28,8 @@ /* Authors: Keith Whitwell <keith@tungstengraphics.com> */ +#include "pipe/p_inlines.h" + #include "fo_context.h" @@ -322,18 +324,27 @@ failover_create_sampler_state(struct pipe_context *pipe, } static void -failover_bind_sampler_state(struct pipe_context *pipe, - unsigned unit, void *sampler) +failover_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) { struct failover_context *failover = failover_context(pipe); struct fo_state *state = (struct fo_state*)sampler; - failover->sampler[unit] = state; + uint i; + assert(num <= PIPE_MAX_SAMPLERS); + /* Check for no-op */ + if (num == failover->num_samplers && + !memcmp(failover->sampler, sampler, num * sizeof(void *))) + return; + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + failover->sw_sampler_state[i] = i < num ? state[i].sw_state : NULL; + failover->hw_sampler_state[i] = i < num ? state[i].hw_state : NULL; + } failover->dirty |= FO_NEW_SAMPLER; - failover->dirty_sampler |= (1<<unit); - failover->sw->bind_sampler_state(failover->sw, unit, - state->sw_state); - failover->hw->bind_sampler_state(failover->hw, unit, - state->hw_state); + failover->num_samplers = num; + failover->sw->bind_sampler_states(failover->sw, num, + failover->sw_sampler_state); + failover->hw->bind_sampler_states(failover->hw, num, + failover->hw_sampler_state); } static void @@ -351,17 +362,29 @@ failover_delete_sampler_state(struct pipe_context *pipe, void *sampler) static void -failover_set_sampler_texture(struct pipe_context *pipe, - unsigned unit, - struct pipe_texture *texture) +failover_set_sampler_textures(struct pipe_context *pipe, + unsigned num, + struct pipe_texture **texture) { struct failover_context *failover = failover_context(pipe); - - failover->texture[unit] = texture; + uint i; + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == failover->num_textures && + !memcmp(failover->texture, texture, num * sizeof(struct pipe_texture *))) + return; + for (i = 0; i < num; i++) + pipe_texture_reference((struct pipe_texture **) &failover->texture[i], + texture[i]); + for (i = num; i < failover->num_textures; i++) + pipe_texture_reference((struct pipe_texture **) &failover->texture[i], + NULL); failover->dirty |= FO_NEW_TEXTURE; - failover->dirty_texture |= (1<<unit); - failover->sw->set_sampler_texture( failover->sw, unit, texture ); - failover->hw->set_sampler_texture( failover->hw, unit, texture ); + failover->num_textures = num; + failover->sw->set_sampler_textures( failover->sw, num, texture ); + failover->hw->set_sampler_textures( failover->hw, num, texture ); } @@ -429,7 +452,7 @@ failover_init_state_functions( struct failover_context *failover ) failover->pipe.bind_blend_state = failover_bind_blend_state; failover->pipe.delete_blend_state = failover_delete_blend_state; failover->pipe.create_sampler_state = failover_create_sampler_state; - failover->pipe.bind_sampler_state = failover_bind_sampler_state; + failover->pipe.bind_sampler_states = failover_bind_sampler_states; failover->pipe.delete_sampler_state = failover_delete_sampler_state; failover->pipe.create_depth_stencil_alpha_state = failover_create_depth_stencil_state; failover->pipe.bind_depth_stencil_alpha_state = failover_bind_depth_stencil_state; @@ -449,7 +472,7 @@ failover_init_state_functions( struct failover_context *failover ) failover->pipe.set_framebuffer_state = failover_set_framebuffer_state; failover->pipe.set_polygon_stipple = failover_set_polygon_stipple; failover->pipe.set_scissor_state = failover_set_scissor_state; - failover->pipe.set_sampler_texture = failover_set_sampler_texture; + failover->pipe.set_sampler_textures = failover_set_sampler_textures; failover->pipe.set_viewport_state = failover_set_viewport_state; failover->pipe.set_vertex_buffer = failover_set_vertex_buffer; failover->pipe.set_vertex_element = failover_set_vertex_element; diff --git a/src/gallium/drivers/failover/fo_state_emit.c b/src/gallium/drivers/failover/fo_state_emit.c index c663dd4947..3de931e04e 100644 --- a/src/gallium/drivers/failover/fo_state_emit.c +++ b/src/gallium/drivers/failover/fo_state_emit.c @@ -94,21 +94,13 @@ failover_state_emit( struct failover_context *failover ) failover->sw->set_viewport_state( failover->sw, &failover->viewport ); if (failover->dirty & FO_NEW_SAMPLER) { - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - if (failover->dirty_sampler & (1<<i)) { - failover->sw->bind_sampler_state( failover->sw, i, - failover->sampler[i]->sw_state ); - } - } + failover->sw->bind_sampler_states( failover->sw, failover->num_samplers, + failover->sw_sampler_state ); } if (failover->dirty & FO_NEW_TEXTURE) { - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - if (failover->dirty_texture & (1<<i)) { - failover->sw->set_sampler_texture( failover->sw, i, - failover->texture[i] ); - } - } + failover->sw->set_sampler_textures( failover->sw, failover->num_textures, + failover->texture ); } if (failover->dirty & FO_NEW_VERTEX_BUFFER) { @@ -132,6 +124,4 @@ failover_state_emit( struct failover_context *failover ) failover->dirty = 0; failover->dirty_vertex_element = 0; failover->dirty_vertex_buffer = 0; - failover->dirty_texture = 0; - failover->dirty_sampler = 0; } diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h index 6401112f83..746f18ba38 100644 --- a/src/gallium/drivers/i915simple/i915_context.h +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -1,336 +1,339 @@ - /************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef I915_CONTEXT_H -#define I915_CONTEXT_H - - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "draw/draw_vertex.h" - -#include "tgsi/util/tgsi_scan.h" - - -#define I915_TEX_UNITS 8 - -#define I915_DYNAMIC_MODES4 0 -#define I915_DYNAMIC_DEPTHSCALE_0 1 /* just the header */ -#define I915_DYNAMIC_DEPTHSCALE_1 2 -#define I915_DYNAMIC_IAB 3 -#define I915_DYNAMIC_BC_0 4 /* just the header */ -#define I915_DYNAMIC_BC_1 5 -#define I915_DYNAMIC_BFO_0 6 -#define I915_DYNAMIC_BFO_1 7 -#define I915_DYNAMIC_STP_0 8 -#define I915_DYNAMIC_STP_1 9 -#define I915_DYNAMIC_SC_ENA_0 10 -#define I915_DYNAMIC_SC_RECT_0 11 -#define I915_DYNAMIC_SC_RECT_1 12 -#define I915_DYNAMIC_SC_RECT_2 13 -#define I915_MAX_DYNAMIC 14 - - -#define I915_IMMEDIATE_S0 0 -#define I915_IMMEDIATE_S1 1 -#define I915_IMMEDIATE_S2 2 -#define I915_IMMEDIATE_S3 3 -#define I915_IMMEDIATE_S4 4 -#define I915_IMMEDIATE_S5 5 -#define I915_IMMEDIATE_S6 6 -#define I915_IMMEDIATE_S7 7 -#define I915_MAX_IMMEDIATE 8 - -/* These must mach the order of LI0_STATE_* bits, as they will be used - * to generate hardware packets: - */ -#define I915_CACHE_STATIC 0 -#define I915_CACHE_DYNAMIC 1 /* handled specially */ -#define I915_CACHE_SAMPLER 2 -#define I915_CACHE_MAP 3 -#define I915_CACHE_PROGRAM 4 -#define I915_CACHE_CONSTANTS 5 -#define I915_MAX_CACHE 6 - -#define I915_MAX_CONSTANT 32 - - -/** See constant_flags[] below */ -#define I915_CONSTFLAG_USER 0x1f - - -/** - * Subclass of pipe_shader_state - */ -struct i915_fragment_shader -{ - struct pipe_shader_state state; - - struct tgsi_shader_info info; - - uint *program; - uint program_len; - - /** - * constants introduced during translation. - * These are placed at the end of the constant buffer and grow toward - * the beginning (eg: slot 31, 30 29, ...) - * User-provided constants start at 0. - * This allows both types of constants to co-exist (until there's too many) - * and doesn't require regenerating/changing the fragment program to - * shuffle constants around. - */ - uint num_constants; - float constants[I915_MAX_CONSTANT][4]; - - /** - * Status of each constant - * if I915_CONSTFLAG_PARAM, the value must be taken from the corresponding - * slot of the user's constant buffer. (set by pipe->set_constant_buffer()) - * Else, the bitmask indicates which components are occupied by immediates. - */ - ubyte constant_flags[I915_MAX_CONSTANT]; -}; - - -struct i915_cache_context; - -/* Use to calculate differences between state emitted to hardware and - * current driver-calculated state. - */ -struct i915_state -{ - unsigned immediate[I915_MAX_IMMEDIATE]; - unsigned dynamic[I915_MAX_DYNAMIC]; - - float constants[PIPE_SHADER_TYPES][I915_MAX_CONSTANT][4]; - /** number of constants passed in through a constant buffer */ - uint num_user_constants[PIPE_SHADER_TYPES]; - - /* texture sampler state */ - unsigned sampler[I915_TEX_UNITS][3]; - unsigned sampler_enable_flags; - unsigned sampler_enable_nr; - - /* texture image buffers */ - unsigned texbuffer[I915_TEX_UNITS][2]; - - /** Describes the current hardware vertex layout */ - struct vertex_info vertex_info; - - unsigned id; /* track lost context events */ -}; - -struct i915_blend_state { - unsigned iab; - unsigned modes4; - unsigned LIS5; - unsigned LIS6; -}; - -struct i915_depth_stencil_state { - unsigned stencil_modes4; - unsigned bfo[2]; - unsigned stencil_LIS5; - unsigned depth_LIS6; -}; - -struct i915_rasterizer_state { - int light_twoside : 1; - unsigned st; - enum interp_mode color_interp; - - unsigned LIS4; - unsigned LIS7; - unsigned sc[1]; - - const struct pipe_rasterizer_state *templ; - - union { float f; unsigned u; } ds[2]; -}; - -struct i915_sampler_state { - unsigned state[3]; - const struct pipe_sampler_state *templ; -}; - - -struct i915_texture { - struct pipe_texture base; - - /* Derived from the above: - */ - unsigned pitch; - unsigned depth_pitch; /* per-image on i945? */ - unsigned total_height; - - unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; - - /* Explicitly store the offset of each image for each cube face or - * depth value. Pretty much have to accept that hardware formats - * are going to be so diverse that there is no unified way to - * compute the offsets of depth/cube images within a mipmap level, - * so have to store them as a lookup table: - */ - unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ - - /* Includes image offset tables: - */ - unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; - - /* The data is held here: - */ - struct pipe_buffer *buffer; -}; - -struct i915_context -{ - struct pipe_context pipe; - struct i915_winsys *winsys; - struct draw_context *draw; - - /* The most recent drawing state as set by the driver: - */ - const struct i915_blend_state *blend; - const struct i915_sampler_state *sampler[PIPE_MAX_SAMPLERS]; - const struct i915_depth_stencil_state *depth_stencil; - const struct i915_rasterizer_state *rasterizer; - - struct i915_fragment_shader *fs; - - struct pipe_blend_color blend_color; - struct pipe_clip_state clip; - struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; - struct pipe_framebuffer_state framebuffer; - struct pipe_poly_stipple poly_stipple; - struct pipe_scissor_state scissor; - struct i915_texture *texture[PIPE_MAX_SAMPLERS]; - struct pipe_viewport_state viewport; - struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; - - unsigned dirty; - - unsigned *batch_start; - - /** Vertex buffer */ - struct pipe_buffer *vbo; - - struct i915_state current; - unsigned hardware_dirty; - - unsigned debug; -}; - -/* A flag for each state_tracker state object: - */ -#define I915_NEW_VIEWPORT 0x1 -#define I915_NEW_RASTERIZER 0x2 -#define I915_NEW_FS 0x4 -#define I915_NEW_BLEND 0x8 -#define I915_NEW_CLIP 0x10 -#define I915_NEW_SCISSOR 0x20 -#define I915_NEW_STIPPLE 0x40 -#define I915_NEW_FRAMEBUFFER 0x80 -#define I915_NEW_ALPHA_TEST 0x100 -#define I915_NEW_DEPTH_STENCIL 0x200 -#define I915_NEW_SAMPLER 0x400 -#define I915_NEW_TEXTURE 0x800 -#define I915_NEW_CONSTANTS 0x1000 -#define I915_NEW_VBO 0x2000 -#define I915_NEW_VS 0x4000 - - -/* Driver's internally generated state flags: - */ -#define I915_NEW_VERTEX_FORMAT 0x10000 - - -/* Dirty flags for hardware emit - */ -#define I915_HW_STATIC (1<<I915_CACHE_STATIC) -#define I915_HW_DYNAMIC (1<<I915_CACHE_DYNAMIC) -#define I915_HW_SAMPLER (1<<I915_CACHE_SAMPLER) -#define I915_HW_MAP (1<<I915_CACHE_MAP) -#define I915_HW_PROGRAM (1<<I915_CACHE_PROGRAM) -#define I915_HW_CONSTANTS (1<<I915_CACHE_CONSTANTS) -#define I915_HW_IMMEDIATE (1<<(I915_MAX_CACHE+0)) -#define I915_HW_INVARIENT (1<<(I915_MAX_CACHE+1)) - - -/*********************************************************************** - * i915_prim_emit.c: - */ -struct draw_stage *i915_draw_render_stage( struct i915_context *i915 ); - - -/*********************************************************************** - * i915_prim_vbuf.c: - */ -struct draw_stage *i915_draw_vbuf_stage( struct i915_context *i915 ); - - -/*********************************************************************** - * i915_state_emit.c: - */ -void i915_emit_hardware_state(struct i915_context *i915 ); - - - -/*********************************************************************** - * i915_clear.c: - */ -void i915_clear(struct pipe_context *pipe, struct pipe_surface *ps, - unsigned clearValue); - - -/*********************************************************************** - * i915_surface.c: - */ -void i915_init_surface_functions( struct i915_context *i915 ); - -void i915_init_state_functions( struct i915_context *i915 ); -void i915_init_flush_functions( struct i915_context *i915 ); -void i915_init_string_functions( struct i915_context *i915 ); - - - - -/*********************************************************************** - * Inline conversion functions. These are better-typed than the - * macros used previously: - */ -static INLINE struct i915_context * -i915_context( struct pipe_context *pipe ) -{ - return (struct i915_context *)pipe; -} - - - -#endif + /**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef I915_CONTEXT_H
+#define I915_CONTEXT_H
+
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "draw/draw_vertex.h"
+
+#include "tgsi/util/tgsi_scan.h"
+
+
+#define I915_TEX_UNITS 8
+
+#define I915_DYNAMIC_MODES4 0
+#define I915_DYNAMIC_DEPTHSCALE_0 1 /* just the header */
+#define I915_DYNAMIC_DEPTHSCALE_1 2
+#define I915_DYNAMIC_IAB 3
+#define I915_DYNAMIC_BC_0 4 /* just the header */
+#define I915_DYNAMIC_BC_1 5
+#define I915_DYNAMIC_BFO_0 6
+#define I915_DYNAMIC_BFO_1 7
+#define I915_DYNAMIC_STP_0 8
+#define I915_DYNAMIC_STP_1 9
+#define I915_DYNAMIC_SC_ENA_0 10
+#define I915_DYNAMIC_SC_RECT_0 11
+#define I915_DYNAMIC_SC_RECT_1 12
+#define I915_DYNAMIC_SC_RECT_2 13
+#define I915_MAX_DYNAMIC 14
+
+
+#define I915_IMMEDIATE_S0 0
+#define I915_IMMEDIATE_S1 1
+#define I915_IMMEDIATE_S2 2
+#define I915_IMMEDIATE_S3 3
+#define I915_IMMEDIATE_S4 4
+#define I915_IMMEDIATE_S5 5
+#define I915_IMMEDIATE_S6 6
+#define I915_IMMEDIATE_S7 7
+#define I915_MAX_IMMEDIATE 8
+
+/* These must mach the order of LI0_STATE_* bits, as they will be used
+ * to generate hardware packets:
+ */
+#define I915_CACHE_STATIC 0
+#define I915_CACHE_DYNAMIC 1 /* handled specially */
+#define I915_CACHE_SAMPLER 2
+#define I915_CACHE_MAP 3
+#define I915_CACHE_PROGRAM 4
+#define I915_CACHE_CONSTANTS 5
+#define I915_MAX_CACHE 6
+
+#define I915_MAX_CONSTANT 32
+
+
+/** See constant_flags[] below */
+#define I915_CONSTFLAG_USER 0x1f
+
+
+/**
+ * Subclass of pipe_shader_state
+ */
+struct i915_fragment_shader
+{
+ struct pipe_shader_state state;
+
+ struct tgsi_shader_info info;
+
+ uint *program;
+ uint program_len;
+
+ /**
+ * constants introduced during translation.
+ * These are placed at the end of the constant buffer and grow toward
+ * the beginning (eg: slot 31, 30 29, ...)
+ * User-provided constants start at 0.
+ * This allows both types of constants to co-exist (until there's too many)
+ * and doesn't require regenerating/changing the fragment program to
+ * shuffle constants around.
+ */
+ uint num_constants;
+ float constants[I915_MAX_CONSTANT][4];
+
+ /**
+ * Status of each constant
+ * if I915_CONSTFLAG_PARAM, the value must be taken from the corresponding
+ * slot of the user's constant buffer. (set by pipe->set_constant_buffer())
+ * Else, the bitmask indicates which components are occupied by immediates.
+ */
+ ubyte constant_flags[I915_MAX_CONSTANT];
+};
+
+
+struct i915_cache_context;
+
+/* Use to calculate differences between state emitted to hardware and
+ * current driver-calculated state.
+ */
+struct i915_state
+{
+ unsigned immediate[I915_MAX_IMMEDIATE];
+ unsigned dynamic[I915_MAX_DYNAMIC];
+
+ float constants[PIPE_SHADER_TYPES][I915_MAX_CONSTANT][4];
+ /** number of constants passed in through a constant buffer */
+ uint num_user_constants[PIPE_SHADER_TYPES];
+
+ /* texture sampler state */
+ unsigned sampler[I915_TEX_UNITS][3];
+ unsigned sampler_enable_flags;
+ unsigned sampler_enable_nr;
+
+ /* texture image buffers */
+ unsigned texbuffer[I915_TEX_UNITS][2];
+
+ /** Describes the current hardware vertex layout */
+ struct vertex_info vertex_info;
+
+ unsigned id; /* track lost context events */
+};
+
+struct i915_blend_state {
+ unsigned iab;
+ unsigned modes4;
+ unsigned LIS5;
+ unsigned LIS6;
+};
+
+struct i915_depth_stencil_state {
+ unsigned stencil_modes4;
+ unsigned bfo[2];
+ unsigned stencil_LIS5;
+ unsigned depth_LIS6;
+};
+
+struct i915_rasterizer_state {
+ int light_twoside : 1;
+ unsigned st;
+ enum interp_mode color_interp;
+
+ unsigned LIS4;
+ unsigned LIS7;
+ unsigned sc[1];
+
+ const struct pipe_rasterizer_state *templ;
+
+ union { float f; unsigned u; } ds[2];
+};
+
+struct i915_sampler_state {
+ unsigned state[3];
+ const struct pipe_sampler_state *templ;
+};
+
+
+struct i915_texture {
+ struct pipe_texture base;
+
+ /* Derived from the above:
+ */
+ unsigned pitch;
+ unsigned depth_pitch; /* per-image on i945? */
+ unsigned total_height;
+
+ unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS];
+
+ /* Explicitly store the offset of each image for each cube face or
+ * depth value. Pretty much have to accept that hardware formats
+ * are going to be so diverse that there is no unified way to
+ * compute the offsets of depth/cube images within a mipmap level,
+ * so have to store them as a lookup table:
+ */
+ unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */
+
+ /* Includes image offset tables:
+ */
+ unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS];
+
+ /* The data is held here:
+ */
+ struct pipe_buffer *buffer;
+};
+
+struct i915_context
+{
+ struct pipe_context pipe;
+ struct i915_winsys *winsys;
+ struct draw_context *draw;
+
+ /* The most recent drawing state as set by the driver:
+ */
+ const struct i915_blend_state *blend;
+ const struct i915_sampler_state *sampler[PIPE_MAX_SAMPLERS];
+ const struct i915_depth_stencil_state *depth_stencil;
+ const struct i915_rasterizer_state *rasterizer;
+
+ struct i915_fragment_shader *fs;
+
+ struct pipe_blend_color blend_color;
+ struct pipe_clip_state clip;
+ struct pipe_constant_buffer constants[PIPE_SHADER_TYPES];
+ struct pipe_framebuffer_state framebuffer;
+ struct pipe_poly_stipple poly_stipple;
+ struct pipe_scissor_state scissor;
+ struct i915_texture *texture[PIPE_MAX_SAMPLERS];
+ struct pipe_viewport_state viewport;
+ struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX];
+
+ unsigned dirty;
+
+ unsigned num_samplers;
+ unsigned num_textures;
+
+ unsigned *batch_start;
+
+ /** Vertex buffer */
+ struct pipe_buffer *vbo;
+
+ struct i915_state current;
+ unsigned hardware_dirty;
+
+ unsigned debug;
+};
+
+/* A flag for each state_tracker state object:
+ */
+#define I915_NEW_VIEWPORT 0x1
+#define I915_NEW_RASTERIZER 0x2
+#define I915_NEW_FS 0x4
+#define I915_NEW_BLEND 0x8
+#define I915_NEW_CLIP 0x10
+#define I915_NEW_SCISSOR 0x20
+#define I915_NEW_STIPPLE 0x40
+#define I915_NEW_FRAMEBUFFER 0x80
+#define I915_NEW_ALPHA_TEST 0x100
+#define I915_NEW_DEPTH_STENCIL 0x200
+#define I915_NEW_SAMPLER 0x400
+#define I915_NEW_TEXTURE 0x800
+#define I915_NEW_CONSTANTS 0x1000
+#define I915_NEW_VBO 0x2000
+#define I915_NEW_VS 0x4000
+
+
+/* Driver's internally generated state flags:
+ */
+#define I915_NEW_VERTEX_FORMAT 0x10000
+
+
+/* Dirty flags for hardware emit
+ */
+#define I915_HW_STATIC (1<<I915_CACHE_STATIC)
+#define I915_HW_DYNAMIC (1<<I915_CACHE_DYNAMIC)
+#define I915_HW_SAMPLER (1<<I915_CACHE_SAMPLER)
+#define I915_HW_MAP (1<<I915_CACHE_MAP)
+#define I915_HW_PROGRAM (1<<I915_CACHE_PROGRAM)
+#define I915_HW_CONSTANTS (1<<I915_CACHE_CONSTANTS)
+#define I915_HW_IMMEDIATE (1<<(I915_MAX_CACHE+0))
+#define I915_HW_INVARIENT (1<<(I915_MAX_CACHE+1))
+
+
+/***********************************************************************
+ * i915_prim_emit.c:
+ */
+struct draw_stage *i915_draw_render_stage( struct i915_context *i915 );
+
+
+/***********************************************************************
+ * i915_prim_vbuf.c:
+ */
+struct draw_stage *i915_draw_vbuf_stage( struct i915_context *i915 );
+
+
+/***********************************************************************
+ * i915_state_emit.c:
+ */
+void i915_emit_hardware_state(struct i915_context *i915 );
+
+
+
+/***********************************************************************
+ * i915_clear.c:
+ */
+void i915_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue);
+
+
+/***********************************************************************
+ * i915_surface.c:
+ */
+void i915_init_surface_functions( struct i915_context *i915 );
+
+void i915_init_state_functions( struct i915_context *i915 );
+void i915_init_flush_functions( struct i915_context *i915 );
+void i915_init_string_functions( struct i915_context *i915 );
+
+
+
+
+/***********************************************************************
+ * Inline conversion functions. These are better-typed than the
+ * macros used previously:
+ */
+static INLINE struct i915_context *
+i915_context( struct pipe_context *pipe )
+{
+ return (struct i915_context *)pipe;
+}
+
+
+
+#endif
diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c index c2d9a93c6c..7b4fca5db1 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_translate.c +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -872,19 +872,17 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_TEX: - if (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide - == TGSI_EXTSWIZZLE_W) { - emit_tex(p, inst, T0_TEXLDP); - } - else { - emit_tex(p, inst, T0_TEXLD); - } + emit_tex(p, inst, T0_TEXLD); break; case TGSI_OPCODE_TXB: emit_tex(p, inst, T0_TEXLDB); break; + case TGSI_OPCODE_TXP: + emit_tex(p, inst, T0_TEXLDP); + break; + case TGSI_OPCODE_XPD: /* Cross product: * result.x = src0.y * src1.z - src0.z * src1.y; diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index 27af46bea0..503c092400 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -71,6 +71,8 @@ static unsigned translate_img_filter( unsigned filter ) return FILTER_NEAREST; case PIPE_TEX_FILTER_LINEAR: return FILTER_LINEAR; + case PIPE_TEX_FILTER_ANISO: + return FILTER_ANISOTROPIC; default: assert(0); return FILTER_NEAREST; @@ -84,7 +86,7 @@ static unsigned translate_mip_filter( unsigned filter ) return MIPFILTER_NONE; case PIPE_TEX_MIPFILTER_NEAREST: return MIPFILTER_NEAREST; - case PIPE_TEX_FILTER_LINEAR: + case PIPE_TEX_MIPFILTER_LINEAR: return MIPFILTER_LINEAR; default: assert(0); @@ -211,16 +213,11 @@ i915_create_sampler_state(struct pipe_context *pipe, cso->templ = sampler; mipFilt = translate_mip_filter(sampler->min_mip_filter); - if (sampler->max_anisotropy > 1.0) { - minFilt = FILTER_ANISOTROPIC; - magFilt = FILTER_ANISOTROPIC; - if (sampler->max_anisotropy > 2.0) { - cso->state[0] |= SS2_MAX_ANISO_4; - } - } - else { - minFilt = translate_img_filter( sampler->min_img_filter ); - magFilt = translate_img_filter( sampler->mag_img_filter ); + minFilt = translate_img_filter( sampler->min_img_filter ); + magFilt = translate_img_filter( sampler->mag_img_filter ); + + if (sampler->max_anisotropy > 2.0) { + cso->state[0] |= SS2_MAX_ANISO_4; } { @@ -269,13 +266,25 @@ i915_create_sampler_state(struct pipe_context *pipe, return cso; } -static void i915_bind_sampler_state(struct pipe_context *pipe, - unsigned unit, void *sampler) +static void i915_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) { struct i915_context *i915 = i915_context(pipe); + unsigned i; + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == i915->num_samplers && + !memcmp(i915->sampler, sampler, num * sizeof(void *))) + return; - assert(unit < PIPE_MAX_SAMPLERS); - i915->sampler[unit] = (const struct i915_sampler_state*)sampler; + for (i = 0; i < num; ++i) + i915->sampler[i] = sampler[i]; + for (i = num; i < PIPE_MAX_SAMPLERS; ++i) + i915->sampler[i] = NULL; + + i915->num_samplers = num; i915->dirty |= I915_NEW_SAMPLER; } @@ -526,14 +535,29 @@ static void i915_set_constant_buffer(struct pipe_context *pipe, } -static void i915_set_sampler_texture(struct pipe_context *pipe, - unsigned sampler, - struct pipe_texture *texture) +static void i915_set_sampler_textures(struct pipe_context *pipe, + unsigned num, + struct pipe_texture **texture) { struct i915_context *i915 = i915_context(pipe); + uint i; + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == i915->num_textures && + !memcmp(i915->texture, texture, num * sizeof(struct pipe_texture *))) + return; + + for (i = 0; i < num; i++) + pipe_texture_reference((struct pipe_texture **) &i915->texture[i], + texture[i]); + + for (i = num; i < i915->num_textures; i++) + pipe_texture_reference((struct pipe_texture **) &i915->texture[i], + NULL); - pipe_texture_reference((struct pipe_texture **) &i915->texture[sampler], - texture); + i915->num_textures = num; i915->dirty |= I915_NEW_TEXTURE; } @@ -644,22 +668,23 @@ i915_create_rasterizer_state(struct pipe_context *pipe, } static void i915_bind_rasterizer_state( struct pipe_context *pipe, - void *setup ) + void *raster ) { struct i915_context *i915 = i915_context(pipe); - i915->rasterizer = (struct i915_rasterizer_state *)setup; + i915->rasterizer = (struct i915_rasterizer_state *)raster; /* pass-through to draw module */ - draw_set_rasterizer_state(i915->draw, i915->rasterizer->templ); + draw_set_rasterizer_state(i915->draw, + (i915->rasterizer ? i915->rasterizer->templ : NULL)); i915->dirty |= I915_NEW_RASTERIZER; } static void i915_delete_rasterizer_state(struct pipe_context *pipe, - void *setup) + void *raster) { - FREE(setup); + FREE(raster); } static void i915_set_vertex_buffer( struct pipe_context *pipe, @@ -691,7 +716,7 @@ i915_init_state_functions( struct i915_context *i915 ) i915->pipe.delete_blend_state = i915_delete_blend_state; i915->pipe.create_sampler_state = i915_create_sampler_state; - i915->pipe.bind_sampler_state = i915_bind_sampler_state; + i915->pipe.bind_sampler_states = i915_bind_sampler_states; i915->pipe.delete_sampler_state = i915_delete_sampler_state; i915->pipe.create_depth_stencil_alpha_state = i915_create_depth_stencil_state; @@ -715,7 +740,7 @@ i915_init_state_functions( struct i915_context *i915 ) i915->pipe.set_polygon_stipple = i915_set_polygon_stipple; i915->pipe.set_scissor_state = i915_set_scissor_state; - i915->pipe.set_sampler_texture = i915_set_sampler_texture; + i915->pipe.set_sampler_textures = i915_set_sampler_textures; i915->pipe.set_viewport_state = i915_set_viewport_state; i915->pipe.set_vertex_buffer = i915_set_vertex_buffer; i915->pipe.set_vertex_element = i915_set_vertex_element; diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915simple/i915_state_emit.c index 6bbaac4e34..a7498d22b7 100644 --- a/src/gallium/drivers/i915simple/i915_state_emit.c +++ b/src/gallium/drivers/i915simple/i915_state_emit.c @@ -267,12 +267,6 @@ i915_emit_hardware_state(struct i915_context *i915 ) /* 2 + I915_TEX_UNITS*3 dwords, I915_TEX_UNITS relocs */ if (i915->hardware_dirty & (I915_HW_MAP | I915_HW_SAMPLER)) { - /* XXX: we were refering to sampler state - * (current.sampler_enable_nr) below, but only checking - * I915_HW_MAP above. Should probably calculate the enabled - * flags separately - but there will be further rework of - * state so perhaps not necessary yet. - */ const uint nr = i915->current.sampler_enable_nr; if (nr) { const uint enabled = i915->current.sampler_enable_flags; diff --git a/src/gallium/drivers/i915simple/i915_state_sampler.c b/src/gallium/drivers/i915simple/i915_state_sampler.c index 9c1a5bbbd6..9dbb1b1b23 100644 --- a/src/gallium/drivers/i915simple/i915_state_sampler.c +++ b/src/gallium/drivers/i915simple/i915_state_sampler.c @@ -106,7 +106,8 @@ void i915_update_samplers( struct i915_context *i915 ) i915->current.sampler_enable_nr = 0; i915->current.sampler_enable_flags = 0x0; - for (unit = 0; unit < I915_TEX_UNITS; unit++) { + for (unit = 0; unit < i915->num_textures && unit < i915->num_samplers; + unit++) { /* determine unit enable/disable by looking for a bound texture */ /* could also examine the fragment program? */ if (i915->texture[unit]) { @@ -219,7 +220,8 @@ i915_update_textures(struct i915_context *i915) { uint unit; - for (unit = 0; unit < I915_TEX_UNITS; unit++) { + for (unit = 0; unit < i915->num_textures && unit < i915->num_samplers; + unit++) { /* determine unit enable/disable by looking for a bound texture */ /* could also examine the fragment program? */ if (i915->texture[unit]) { diff --git a/src/gallium/drivers/i965simple/brw_context.h b/src/gallium/drivers/i965simple/brw_context.h index 4da3a8cffc..b83a13c3b6 100644 --- a/src/gallium/drivers/i965simple/brw_context.h +++ b/src/gallium/drivers/i965simple/brw_context.h @@ -1,681 +1,684 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#ifndef BRWCONTEXT_INC -#define BRWCONTEXT_INC - - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "tgsi/util/tgsi_scan.h" - -#include "brw_structs.h" -#include "brw_winsys.h" - - -/* Glossary: - * - * URB - uniform resource buffer. A mid-sized buffer which is - * partitioned between the fixed function units and used for passing - * values (vertices, primitives, constants) between them. - * - * CURBE - constant URB entry. An urb region (entry) used to hold - * constant values which the fixed function units can be instructed to - * preload into the GRF when spawining a thread. - * - * VUE - vertex URB entry. An urb entry holding a vertex and usually - * a vertex header. The header contains control information and - * things like primitive type, Begin/end flags and clip codes. - * - * PUE - primitive URB entry. An urb entry produced by the setup (SF) - * unit holding rasterization and interpolation parameters. - * - * GRF - general register file. One of several register files - * addressable by programmed threads. The inputs (r0, payload, curbe, - * urb) of the thread are preloaded to this area before the thread is - * spawned. The registers are individually 8 dwords wide and suitable - * for general usage. Registers holding thread input values are not - * special and may be overwritten. - * - * MRF - message register file. Threads communicate (and terminate) - * by sending messages. Message parameters are placed in contigous - * MRF registers. All program output is via these messages. URB - * entries are populated by sending a message to the shared URB - * function containing the new data, together with a control word, - * often an unmodified copy of R0. - * - * R0 - GRF register 0. Typically holds control information used when - * sending messages to other threads. - * - * EU or GEN4 EU: The name of the programmable subsystem of the - * i965 hardware. Threads are executed by the EU, the registers - * described above are part of the EU architecture. - * - * Fixed function units: - * - * CS - Command streamer. Notional first unit, little software - * interaction. Holds the URB entries used for constant data, ie the - * CURBEs. - * - * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of - * this unit is responsible for pulling vertices out of vertex buffers - * in vram and injecting them into the processing pipe as VUEs. If - * enabled, it first passes them to a VS thread which is a good place - * for the driver to implement any active vertex shader. - * - * GS - Geometry Shader. This corresponds to a new DX10 concept. If - * enabled, incoming strips etc are passed to GS threads in individual - * line/triangle/point units. The GS thread may perform arbitary - * computation and emit whatever primtives with whatever vertices it - * chooses. This makes GS an excellent place to implement GL's - * unfilled polygon modes, though of course it is capable of much - * more. Additionally, GS is used to translate away primitives not - * handled by latter units, including Quads and Lineloops. - * - * CS - Clipper. Mesa's clipping algorithms are imported to run on - * this unit. The fixed function part performs cliptesting against - * the 6 fixed clipplanes and makes descisions on whether or not the - * incoming primitive needs to be passed to a thread for clipping. - * User clip planes are handled via cooperation with the VS thread. - * - * SF - Strips Fans or Setup: Triangles are prepared for - * rasterization. Interpolation coefficients are calculated. - * Flatshading and two-side lighting usually performed here. - * - * WM - Windower. Interpolation of vertex attributes performed here. - * Fragment shader implemented here. SIMD aspects of EU taken full - * advantage of, as pixels are processed in blocks of 16. - * - * CC - Color Calculator. No EU threads associated with this unit. - * Handles blending and (presumably) depth and stencil testing. - */ - -#define BRW_MAX_CURBE (32*16) - -struct brw_context; -struct brw_winsys; - - -/* Raised when we receive new state across the pipe interface: - */ -#define BRW_NEW_VIEWPORT 0x1 -#define BRW_NEW_RASTERIZER 0x2 -#define BRW_NEW_FS 0x4 -#define BRW_NEW_BLEND 0x8 -#define BRW_NEW_CLIP 0x10 -#define BRW_NEW_SCISSOR 0x20 -#define BRW_NEW_STIPPLE 0x40 -#define BRW_NEW_FRAMEBUFFER 0x80 -#define BRW_NEW_ALPHA_TEST 0x100 -#define BRW_NEW_DEPTH_STENCIL 0x200 -#define BRW_NEW_SAMPLER 0x400 -#define BRW_NEW_TEXTURE 0x800 -#define BRW_NEW_CONSTANTS 0x1000 -#define BRW_NEW_VBO 0x2000 -#define BRW_NEW_VS 0x4000 - -/* Raised for other internal events: - */ -#define BRW_NEW_URB_FENCE 0x10000 -#define BRW_NEW_PSP 0x20000 -#define BRW_NEW_CURBE_OFFSETS 0x40000 -#define BRW_NEW_REDUCED_PRIMITIVE 0x80000 -#define BRW_NEW_PRIMITIVE 0x100000 -#define BRW_NEW_SCENE 0x200000 -#define BRW_NEW_SF_LINKAGE 0x400000 - -extern int BRW_DEBUG; - -#define DEBUG_TEXTURE 0x1 -#define DEBUG_STATE 0x2 -#define DEBUG_IOCTL 0x4 -#define DEBUG_PRIMS 0x8 -#define DEBUG_VERTS 0x10 -#define DEBUG_FALLBACKS 0x20 -#define DEBUG_VERBOSE 0x40 -#define DEBUG_DRI 0x80 -#define DEBUG_DMA 0x100 -#define DEBUG_SANITY 0x200 -#define DEBUG_SYNC 0x400 -#define DEBUG_SLEEP 0x800 -#define DEBUG_PIXEL 0x1000 -#define DEBUG_STATS 0x2000 -#define DEBUG_TILE 0x4000 -#define DEBUG_SINGLE_THREAD 0x8000 -#define DEBUG_WM 0x10000 -#define DEBUG_URB 0x20000 -#define DEBUG_VS 0x40000 -#define DEBUG_BATCH 0x80000 -#define DEBUG_BUFMGR 0x100000 -#define DEBUG_BLIT 0x200000 -#define DEBUG_REGION 0x400000 -#define DEBUG_MIPTREE 0x800000 - -#define DBG(...) do { \ - if (BRW_DEBUG & FILE_DEBUG_FLAG) \ - brw->pipe.winsys->printf(brw->pipe.winsys, __VA_ARGS__); \ -} while(0) - -#define PRINT(...) do { \ - brw->pipe.winsys->printf(brw->pipe.winsys, __VA_ARGS__); \ -} while(0) - -struct brw_state_flags { - unsigned cache; - unsigned brw; -}; - - -struct brw_vertex_program { - struct pipe_shader_state program; - struct tgsi_shader_info info; - int id; -}; - - -struct brw_fragment_program { - struct pipe_shader_state program; - struct tgsi_shader_info info; - - boolean UsesDepth; /* XXX add this to tgsi_shader_info? */ - int id; -}; - - -struct pipe_setup_linkage { - struct { - unsigned vp_output:5; - unsigned interp_mode:4; - unsigned bf_vp_output:5; - } fp_input[PIPE_MAX_SHADER_INPUTS]; - - unsigned fp_input_count:5; - unsigned max_vp_output:5; -}; - - - -struct brw_texture { - struct pipe_texture base; - - /* Derived from the above: - */ - unsigned pitch; - unsigned depth_pitch; /* per-image on i945? */ - unsigned total_height; - - unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; - - /* Explicitly store the offset of each image for each cube face or - * depth value. Pretty much have to accept that hardware formats - * are going to be so diverse that there is no unified way to - * compute the offsets of depth/cube images within a mipmap level, - * so have to store them as a lookup table: - */ - unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ - - /* Includes image offset tables: - */ - unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; - - /* The data is held here: - */ - struct pipe_buffer *buffer; -}; - -/* Data about a particular attempt to compile a program. Note that - * there can be many of these, each in a different GL state - * corresponding to a different brw_wm_prog_key struct, with different - * compiled programs: - */ -/* Data about a particular attempt to compile a program. Note that - * there can be many of these, each in a different GL state - * corresponding to a different brw_wm_prog_key struct, with different - * compiled programs: - */ - -struct brw_wm_prog_data { - unsigned curb_read_length; - unsigned urb_read_length; - - unsigned first_curbe_grf; - unsigned total_grf; - unsigned total_scratch; - - /* Internally generated constants for the CURBE. These are loaded - * ahead of the data from the constant buffer. - */ - const float internal_const[8]; - unsigned nr_internal_consts; - unsigned max_const; - - boolean error; -}; - -struct brw_sf_prog_data { - unsigned urb_read_length; - unsigned total_grf; - - /* Each vertex may have upto 12 attributes, 4 components each, - * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11 - * rows. - * - * Actually we use 4 for each, so call it 12 rows. - */ - unsigned urb_entry_size; -}; - -struct brw_clip_prog_data { - unsigned curb_read_length; /* user planes? */ - unsigned clip_mode; - unsigned urb_read_length; - unsigned total_grf; -}; - -struct brw_gs_prog_data { - unsigned urb_read_length; - unsigned total_grf; -}; - -struct brw_vs_prog_data { - unsigned curb_read_length; - unsigned urb_read_length; - unsigned total_grf; - unsigned outputs_written; - - unsigned inputs_read; - - unsigned max_const; - - float imm_buf[PIPE_MAX_CONSTANT][4]; - unsigned num_imm; - unsigned num_consts; - - /* Used for calculating urb partitions: - */ - unsigned urb_entry_size; -}; - - -#define BRW_MAX_TEX_UNIT 8 -#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1 - -/* Create a fixed sized struct for caching binding tables: - */ -struct brw_surface_binding_table { - unsigned surf_ss_offset[BRW_WM_MAX_SURF]; -}; - - -struct brw_cache; - -struct brw_mem_pool { - struct pipe_buffer *buffer; - - unsigned size; - unsigned offset; /* offset of first free byte */ - - struct brw_context *brw; -}; - -struct brw_cache_item { - unsigned hash; - unsigned key_size; /* for variable-sized keys */ - const void *key; - - unsigned offset; /* offset within pool's buffer */ - unsigned data_size; - - struct brw_cache_item *next; -}; - - - -struct brw_cache { - unsigned id; - - const char *name; - - struct brw_context *brw; - struct brw_mem_pool *pool; - - struct brw_cache_item **items; - unsigned size, n_items; - - unsigned key_size; /* for fixed-size keys */ - unsigned aux_size; - - unsigned last_addr; /* offset of active item */ -}; - - - - -/* Considered adding a member to this struct to document which flags - * an update might raise so that ordering of the state atoms can be - * checked or derived at runtime. Dropped the idea in favor of having - * a debug mode where the state is monitored for flags which are - * raised that have already been tested against. - */ -struct brw_tracked_state { - struct brw_state_flags dirty; - void (*update)( struct brw_context *brw ); -}; - - -/* Flags for brw->state.cache. - */ -#define CACHE_NEW_CC_VP (1<<BRW_CC_VP) -#define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT) -#define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG) -#define CACHE_NEW_SAMPLER_DEFAULT_COLOR (1<<BRW_SAMPLER_DEFAULT_COLOR) -#define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER) -#define CACHE_NEW_WM_UNIT (1<<BRW_WM_UNIT) -#define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG) -#define CACHE_NEW_SF_VP (1<<BRW_SF_VP) -#define CACHE_NEW_SF_UNIT (1<<BRW_SF_UNIT) -#define CACHE_NEW_VS_UNIT (1<<BRW_VS_UNIT) -#define CACHE_NEW_VS_PROG (1<<BRW_VS_PROG) -#define CACHE_NEW_GS_UNIT (1<<BRW_GS_UNIT) -#define CACHE_NEW_GS_PROG (1<<BRW_GS_PROG) -#define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP) -#define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT) -#define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG) -#define CACHE_NEW_SURFACE (1<<BRW_SS_SURFACE) -#define CACHE_NEW_SURF_BIND (1<<BRW_SS_SURF_BIND) - - - - -enum brw_mempool_id { - BRW_GS_POOL, - BRW_SS_POOL, - BRW_MAX_POOL -}; - - -struct brw_cached_batch_item { - struct header *header; - unsigned sz; - struct brw_cached_batch_item *next; -}; - - - -/* Protect against a future where PIPE_ATTRIB_MAX > 32. Wouldn't life - * be easier if C allowed arrays of packed elements? - */ -#define ATTRIB_BIT_DWORDS ((PIPE_ATTRIB_MAX+31)/32) - - - - -struct brw_vertex_info { - unsigned varying; /* varying:1[PIPE_ATTRIB_MAX] */ - unsigned sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[PIPE_ATTRIB_MAX] */ -}; - - - - - -struct brw_context -{ - struct pipe_context pipe; - struct brw_winsys *winsys; - - unsigned primitive; - unsigned reduced_primitive; - - boolean emit_state_always; - - struct { - struct brw_state_flags dirty; - } state; - - - struct { - const struct pipe_blend_state *Blend; - const struct pipe_depth_stencil_alpha_state *DepthStencil; - const struct pipe_poly_stipple *PolygonStipple; - const struct pipe_rasterizer_state *Raster; - const struct pipe_sampler_state *Samplers[PIPE_MAX_SAMPLERS]; - const struct brw_vertex_program *VertexProgram; - const struct brw_fragment_program *FragmentProgram; - - struct pipe_clip_state Clip; - struct pipe_blend_color BlendColor; - struct pipe_scissor_state Scissor; - struct pipe_viewport_state Viewport; - struct pipe_framebuffer_state FrameBuffer; - - const struct pipe_constant_buffer *Constants[2]; - const struct brw_texture *Texture[PIPE_MAX_SAMPLERS]; - } attribs; - - struct brw_mem_pool pool[BRW_MAX_POOL]; - struct brw_cache cache[BRW_MAX_CACHE]; - struct brw_cached_batch_item *cached_batch_items; - - struct { - - /* Arrays with buffer objects to copy non-bufferobj arrays into - * for upload: - */ - const struct pipe_vertex_buffer *vbo_array[PIPE_ATTRIB_MAX]; - - struct brw_vertex_element_state inputs[PIPE_ATTRIB_MAX]; - -#define BRW_NR_UPLOAD_BUFS 17 -#define BRW_UPLOAD_INIT_SIZE (128*1024) - - /* Summary of size and varying of active arrays, so we can check - * for changes to this state: - */ - struct brw_vertex_info info; - } vb; - - - unsigned hardware_dirty; - unsigned dirty; - unsigned pci_id; - /* BRW_NEW_URB_ALLOCATIONS: - */ - struct { - unsigned vsize; /* vertex size plus header in urb registers */ - unsigned csize; /* constant buffer size in urb registers */ - unsigned sfsize; /* setup data size in urb registers */ - - boolean constrained; - - unsigned nr_vs_entries; - unsigned nr_gs_entries; - unsigned nr_clip_entries; - unsigned nr_sf_entries; - unsigned nr_cs_entries; - -/* unsigned vs_size; */ -/* unsigned gs_size; */ -/* unsigned clip_size; */ -/* unsigned sf_size; */ -/* unsigned cs_size; */ - - unsigned vs_start; - unsigned gs_start; - unsigned clip_start; - unsigned sf_start; - unsigned cs_start; - } urb; - - - /* BRW_NEW_CURBE_OFFSETS: - */ - struct { - unsigned wm_start; - unsigned wm_size; - unsigned clip_start; - unsigned clip_size; - unsigned vs_start; - unsigned vs_size; - unsigned total_size; - - unsigned gs_offset; - - float *last_buf; - unsigned last_bufsz; - } curbe; - - struct { - struct brw_vs_prog_data *prog_data; - - unsigned prog_gs_offset; - unsigned state_gs_offset; - } vs; - - struct { - struct brw_gs_prog_data *prog_data; - - boolean prog_active; - unsigned prog_gs_offset; - unsigned state_gs_offset; - } gs; - - struct { - struct brw_clip_prog_data *prog_data; - - unsigned prog_gs_offset; - unsigned vp_gs_offset; - unsigned state_gs_offset; - } clip; - - - struct { - struct brw_sf_prog_data *prog_data; - - struct pipe_setup_linkage linkage; - - unsigned prog_gs_offset; - unsigned vp_gs_offset; - unsigned state_gs_offset; - } sf; - - struct { - struct brw_wm_prog_data *prog_data; - -// struct brw_wm_compiler *compile_data; - - - /** - * Array of sampler state uploaded at sampler_gs_offset of BRW_SAMPLER - * cache - */ - struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT]; - - unsigned render_surf; - unsigned nr_surfaces; - - unsigned max_threads; - struct pipe_buffer *scratch_buffer; - unsigned scratch_buffer_size; - - unsigned sampler_count; - unsigned sampler_gs_offset; - - struct brw_surface_binding_table bind; - unsigned bind_ss_offset; - - unsigned prog_gs_offset; - unsigned state_gs_offset; - } wm; - - - struct { - unsigned vp_gs_offset; - unsigned state_gs_offset; - } cc; - - - /* Used to give every program string a unique id - */ - unsigned program_id; -}; - - -#define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a) - - -/*====================================================================== - * brw_vtbl.c - */ -void brw_do_flush( struct brw_context *brw, - unsigned flags ); - - -/*====================================================================== - * brw_state.c - */ -void brw_validate_state(struct brw_context *brw); -void brw_init_state(struct brw_context *brw); -void brw_destroy_state(struct brw_context *brw); - - -/*====================================================================== - * brw_tex.c - */ -void brwUpdateTextureState( struct brw_context *brw ); - - -/* brw_urb.c - */ -void brw_upload_urb_fence(struct brw_context *brw); - -void brw_upload_constant_buffer_state(struct brw_context *brw); - -void brw_init_surface_functions(struct brw_context *brw); -void brw_init_state_functions(struct brw_context *brw); -void brw_init_flush_functions(struct brw_context *brw); -void brw_init_string_functions(struct brw_context *brw); - -/*====================================================================== - * Inline conversion functions. These are better-typed than the - * macros used previously: - */ -static inline struct brw_context * -brw_context( struct pipe_context *ctx ) -{ - return (struct brw_context *)ctx; -} - -#endif - +/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRWCONTEXT_INC
+#define BRWCONTEXT_INC
+
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "tgsi/util/tgsi_scan.h"
+
+#include "brw_structs.h"
+#include "brw_winsys.h"
+
+
+/* Glossary:
+ *
+ * URB - uniform resource buffer. A mid-sized buffer which is
+ * partitioned between the fixed function units and used for passing
+ * values (vertices, primitives, constants) between them.
+ *
+ * CURBE - constant URB entry. An urb region (entry) used to hold
+ * constant values which the fixed function units can be instructed to
+ * preload into the GRF when spawining a thread.
+ *
+ * VUE - vertex URB entry. An urb entry holding a vertex and usually
+ * a vertex header. The header contains control information and
+ * things like primitive type, Begin/end flags and clip codes.
+ *
+ * PUE - primitive URB entry. An urb entry produced by the setup (SF)
+ * unit holding rasterization and interpolation parameters.
+ *
+ * GRF - general register file. One of several register files
+ * addressable by programmed threads. The inputs (r0, payload, curbe,
+ * urb) of the thread are preloaded to this area before the thread is
+ * spawned. The registers are individually 8 dwords wide and suitable
+ * for general usage. Registers holding thread input values are not
+ * special and may be overwritten.
+ *
+ * MRF - message register file. Threads communicate (and terminate)
+ * by sending messages. Message parameters are placed in contigous
+ * MRF registers. All program output is via these messages. URB
+ * entries are populated by sending a message to the shared URB
+ * function containing the new data, together with a control word,
+ * often an unmodified copy of R0.
+ *
+ * R0 - GRF register 0. Typically holds control information used when
+ * sending messages to other threads.
+ *
+ * EU or GEN4 EU: The name of the programmable subsystem of the
+ * i965 hardware. Threads are executed by the EU, the registers
+ * described above are part of the EU architecture.
+ *
+ * Fixed function units:
+ *
+ * CS - Command streamer. Notional first unit, little software
+ * interaction. Holds the URB entries used for constant data, ie the
+ * CURBEs.
+ *
+ * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of
+ * this unit is responsible for pulling vertices out of vertex buffers
+ * in vram and injecting them into the processing pipe as VUEs. If
+ * enabled, it first passes them to a VS thread which is a good place
+ * for the driver to implement any active vertex shader.
+ *
+ * GS - Geometry Shader. This corresponds to a new DX10 concept. If
+ * enabled, incoming strips etc are passed to GS threads in individual
+ * line/triangle/point units. The GS thread may perform arbitary
+ * computation and emit whatever primtives with whatever vertices it
+ * chooses. This makes GS an excellent place to implement GL's
+ * unfilled polygon modes, though of course it is capable of much
+ * more. Additionally, GS is used to translate away primitives not
+ * handled by latter units, including Quads and Lineloops.
+ *
+ * CS - Clipper. Mesa's clipping algorithms are imported to run on
+ * this unit. The fixed function part performs cliptesting against
+ * the 6 fixed clipplanes and makes descisions on whether or not the
+ * incoming primitive needs to be passed to a thread for clipping.
+ * User clip planes are handled via cooperation with the VS thread.
+ *
+ * SF - Strips Fans or Setup: Triangles are prepared for
+ * rasterization. Interpolation coefficients are calculated.
+ * Flatshading and two-side lighting usually performed here.
+ *
+ * WM - Windower. Interpolation of vertex attributes performed here.
+ * Fragment shader implemented here. SIMD aspects of EU taken full
+ * advantage of, as pixels are processed in blocks of 16.
+ *
+ * CC - Color Calculator. No EU threads associated with this unit.
+ * Handles blending and (presumably) depth and stencil testing.
+ */
+
+#define BRW_MAX_CURBE (32*16)
+
+struct brw_context;
+struct brw_winsys;
+
+
+/* Raised when we receive new state across the pipe interface:
+ */
+#define BRW_NEW_VIEWPORT 0x1
+#define BRW_NEW_RASTERIZER 0x2
+#define BRW_NEW_FS 0x4
+#define BRW_NEW_BLEND 0x8
+#define BRW_NEW_CLIP 0x10
+#define BRW_NEW_SCISSOR 0x20
+#define BRW_NEW_STIPPLE 0x40
+#define BRW_NEW_FRAMEBUFFER 0x80
+#define BRW_NEW_ALPHA_TEST 0x100
+#define BRW_NEW_DEPTH_STENCIL 0x200
+#define BRW_NEW_SAMPLER 0x400
+#define BRW_NEW_TEXTURE 0x800
+#define BRW_NEW_CONSTANTS 0x1000
+#define BRW_NEW_VBO 0x2000
+#define BRW_NEW_VS 0x4000
+
+/* Raised for other internal events:
+ */
+#define BRW_NEW_URB_FENCE 0x10000
+#define BRW_NEW_PSP 0x20000
+#define BRW_NEW_CURBE_OFFSETS 0x40000
+#define BRW_NEW_REDUCED_PRIMITIVE 0x80000
+#define BRW_NEW_PRIMITIVE 0x100000
+#define BRW_NEW_SCENE 0x200000
+#define BRW_NEW_SF_LINKAGE 0x400000
+
+extern int BRW_DEBUG;
+
+#define DEBUG_TEXTURE 0x1
+#define DEBUG_STATE 0x2
+#define DEBUG_IOCTL 0x4
+#define DEBUG_PRIMS 0x8
+#define DEBUG_VERTS 0x10
+#define DEBUG_FALLBACKS 0x20
+#define DEBUG_VERBOSE 0x40
+#define DEBUG_DRI 0x80
+#define DEBUG_DMA 0x100
+#define DEBUG_SANITY 0x200
+#define DEBUG_SYNC 0x400
+#define DEBUG_SLEEP 0x800
+#define DEBUG_PIXEL 0x1000
+#define DEBUG_STATS 0x2000
+#define DEBUG_TILE 0x4000
+#define DEBUG_SINGLE_THREAD 0x8000
+#define DEBUG_WM 0x10000
+#define DEBUG_URB 0x20000
+#define DEBUG_VS 0x40000
+#define DEBUG_BATCH 0x80000
+#define DEBUG_BUFMGR 0x100000
+#define DEBUG_BLIT 0x200000
+#define DEBUG_REGION 0x400000
+#define DEBUG_MIPTREE 0x800000
+
+#define DBG(...) do { \
+ if (BRW_DEBUG & FILE_DEBUG_FLAG) \
+ brw->pipe.winsys->printf(brw->pipe.winsys, __VA_ARGS__); \
+} while(0)
+
+#define PRINT(...) do { \
+ brw->pipe.winsys->printf(brw->pipe.winsys, __VA_ARGS__); \
+} while(0)
+
+struct brw_state_flags {
+ unsigned cache;
+ unsigned brw;
+};
+
+
+struct brw_vertex_program {
+ struct pipe_shader_state program;
+ struct tgsi_shader_info info;
+ int id;
+};
+
+
+struct brw_fragment_program {
+ struct pipe_shader_state program;
+ struct tgsi_shader_info info;
+
+ boolean UsesDepth; /* XXX add this to tgsi_shader_info? */
+ int id;
+};
+
+
+struct pipe_setup_linkage {
+ struct {
+ unsigned vp_output:5;
+ unsigned interp_mode:4;
+ unsigned bf_vp_output:5;
+ } fp_input[PIPE_MAX_SHADER_INPUTS];
+
+ unsigned fp_input_count:5;
+ unsigned max_vp_output:5;
+};
+
+
+
+struct brw_texture {
+ struct pipe_texture base;
+
+ /* Derived from the above:
+ */
+ unsigned pitch;
+ unsigned depth_pitch; /* per-image on i945? */
+ unsigned total_height;
+
+ unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS];
+
+ /* Explicitly store the offset of each image for each cube face or
+ * depth value. Pretty much have to accept that hardware formats
+ * are going to be so diverse that there is no unified way to
+ * compute the offsets of depth/cube images within a mipmap level,
+ * so have to store them as a lookup table:
+ */
+ unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */
+
+ /* Includes image offset tables:
+ */
+ unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS];
+
+ /* The data is held here:
+ */
+ struct pipe_buffer *buffer;
+};
+
+/* Data about a particular attempt to compile a program. Note that
+ * there can be many of these, each in a different GL state
+ * corresponding to a different brw_wm_prog_key struct, with different
+ * compiled programs:
+ */
+/* Data about a particular attempt to compile a program. Note that
+ * there can be many of these, each in a different GL state
+ * corresponding to a different brw_wm_prog_key struct, with different
+ * compiled programs:
+ */
+
+struct brw_wm_prog_data {
+ unsigned curb_read_length;
+ unsigned urb_read_length;
+
+ unsigned first_curbe_grf;
+ unsigned total_grf;
+ unsigned total_scratch;
+
+ /* Internally generated constants for the CURBE. These are loaded
+ * ahead of the data from the constant buffer.
+ */
+ const float internal_const[8];
+ unsigned nr_internal_consts;
+ unsigned max_const;
+
+ boolean error;
+};
+
+struct brw_sf_prog_data {
+ unsigned urb_read_length;
+ unsigned total_grf;
+
+ /* Each vertex may have upto 12 attributes, 4 components each,
+ * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11
+ * rows.
+ *
+ * Actually we use 4 for each, so call it 12 rows.
+ */
+ unsigned urb_entry_size;
+};
+
+struct brw_clip_prog_data {
+ unsigned curb_read_length; /* user planes? */
+ unsigned clip_mode;
+ unsigned urb_read_length;
+ unsigned total_grf;
+};
+
+struct brw_gs_prog_data {
+ unsigned urb_read_length;
+ unsigned total_grf;
+};
+
+struct brw_vs_prog_data {
+ unsigned curb_read_length;
+ unsigned urb_read_length;
+ unsigned total_grf;
+ unsigned outputs_written;
+
+ unsigned inputs_read;
+
+ unsigned max_const;
+
+ float imm_buf[PIPE_MAX_CONSTANT][4];
+ unsigned num_imm;
+ unsigned num_consts;
+
+ /* Used for calculating urb partitions:
+ */
+ unsigned urb_entry_size;
+};
+
+
+#define BRW_MAX_TEX_UNIT 8
+#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1
+
+/* Create a fixed sized struct for caching binding tables:
+ */
+struct brw_surface_binding_table {
+ unsigned surf_ss_offset[BRW_WM_MAX_SURF];
+};
+
+
+struct brw_cache;
+
+struct brw_mem_pool {
+ struct pipe_buffer *buffer;
+
+ unsigned size;
+ unsigned offset; /* offset of first free byte */
+
+ struct brw_context *brw;
+};
+
+struct brw_cache_item {
+ unsigned hash;
+ unsigned key_size; /* for variable-sized keys */
+ const void *key;
+
+ unsigned offset; /* offset within pool's buffer */
+ unsigned data_size;
+
+ struct brw_cache_item *next;
+};
+
+
+
+struct brw_cache {
+ unsigned id;
+
+ const char *name;
+
+ struct brw_context *brw;
+ struct brw_mem_pool *pool;
+
+ struct brw_cache_item **items;
+ unsigned size, n_items;
+
+ unsigned key_size; /* for fixed-size keys */
+ unsigned aux_size;
+
+ unsigned last_addr; /* offset of active item */
+};
+
+
+
+
+/* Considered adding a member to this struct to document which flags
+ * an update might raise so that ordering of the state atoms can be
+ * checked or derived at runtime. Dropped the idea in favor of having
+ * a debug mode where the state is monitored for flags which are
+ * raised that have already been tested against.
+ */
+struct brw_tracked_state {
+ struct brw_state_flags dirty;
+ void (*update)( struct brw_context *brw );
+};
+
+
+/* Flags for brw->state.cache.
+ */
+#define CACHE_NEW_CC_VP (1<<BRW_CC_VP)
+#define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT)
+#define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG)
+#define CACHE_NEW_SAMPLER_DEFAULT_COLOR (1<<BRW_SAMPLER_DEFAULT_COLOR)
+#define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER)
+#define CACHE_NEW_WM_UNIT (1<<BRW_WM_UNIT)
+#define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG)
+#define CACHE_NEW_SF_VP (1<<BRW_SF_VP)
+#define CACHE_NEW_SF_UNIT (1<<BRW_SF_UNIT)
+#define CACHE_NEW_VS_UNIT (1<<BRW_VS_UNIT)
+#define CACHE_NEW_VS_PROG (1<<BRW_VS_PROG)
+#define CACHE_NEW_GS_UNIT (1<<BRW_GS_UNIT)
+#define CACHE_NEW_GS_PROG (1<<BRW_GS_PROG)
+#define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP)
+#define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT)
+#define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG)
+#define CACHE_NEW_SURFACE (1<<BRW_SS_SURFACE)
+#define CACHE_NEW_SURF_BIND (1<<BRW_SS_SURF_BIND)
+
+
+
+
+enum brw_mempool_id {
+ BRW_GS_POOL,
+ BRW_SS_POOL,
+ BRW_MAX_POOL
+};
+
+
+struct brw_cached_batch_item {
+ struct header *header;
+ unsigned sz;
+ struct brw_cached_batch_item *next;
+};
+
+
+
+/* Protect against a future where PIPE_ATTRIB_MAX > 32. Wouldn't life
+ * be easier if C allowed arrays of packed elements?
+ */
+#define ATTRIB_BIT_DWORDS ((PIPE_ATTRIB_MAX+31)/32)
+
+
+
+
+struct brw_vertex_info {
+ unsigned varying; /* varying:1[PIPE_ATTRIB_MAX] */
+ unsigned sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[PIPE_ATTRIB_MAX] */
+};
+
+
+
+
+
+struct brw_context
+{
+ struct pipe_context pipe;
+ struct brw_winsys *winsys;
+
+ unsigned primitive;
+ unsigned reduced_primitive;
+
+ boolean emit_state_always;
+
+ struct {
+ struct brw_state_flags dirty;
+ } state;
+
+
+ struct {
+ const struct pipe_blend_state *Blend;
+ const struct pipe_depth_stencil_alpha_state *DepthStencil;
+ const struct pipe_poly_stipple *PolygonStipple;
+ const struct pipe_rasterizer_state *Raster;
+ const struct pipe_sampler_state *Samplers[PIPE_MAX_SAMPLERS];
+ const struct brw_vertex_program *VertexProgram;
+ const struct brw_fragment_program *FragmentProgram;
+
+ struct pipe_clip_state Clip;
+ struct pipe_blend_color BlendColor;
+ struct pipe_scissor_state Scissor;
+ struct pipe_viewport_state Viewport;
+ struct pipe_framebuffer_state FrameBuffer;
+
+ const struct pipe_constant_buffer *Constants[2];
+ const struct brw_texture *Texture[PIPE_MAX_SAMPLERS];
+ } attribs;
+
+ unsigned num_samplers;
+ unsigned num_textures;
+
+ struct brw_mem_pool pool[BRW_MAX_POOL];
+ struct brw_cache cache[BRW_MAX_CACHE];
+ struct brw_cached_batch_item *cached_batch_items;
+
+ struct {
+
+ /* Arrays with buffer objects to copy non-bufferobj arrays into
+ * for upload:
+ */
+ const struct pipe_vertex_buffer *vbo_array[PIPE_ATTRIB_MAX];
+
+ struct brw_vertex_element_state inputs[PIPE_ATTRIB_MAX];
+
+#define BRW_NR_UPLOAD_BUFS 17
+#define BRW_UPLOAD_INIT_SIZE (128*1024)
+
+ /* Summary of size and varying of active arrays, so we can check
+ * for changes to this state:
+ */
+ struct brw_vertex_info info;
+ } vb;
+
+
+ unsigned hardware_dirty;
+ unsigned dirty;
+ unsigned pci_id;
+ /* BRW_NEW_URB_ALLOCATIONS:
+ */
+ struct {
+ unsigned vsize; /* vertex size plus header in urb registers */
+ unsigned csize; /* constant buffer size in urb registers */
+ unsigned sfsize; /* setup data size in urb registers */
+
+ boolean constrained;
+
+ unsigned nr_vs_entries;
+ unsigned nr_gs_entries;
+ unsigned nr_clip_entries;
+ unsigned nr_sf_entries;
+ unsigned nr_cs_entries;
+
+/* unsigned vs_size; */
+/* unsigned gs_size; */
+/* unsigned clip_size; */
+/* unsigned sf_size; */
+/* unsigned cs_size; */
+
+ unsigned vs_start;
+ unsigned gs_start;
+ unsigned clip_start;
+ unsigned sf_start;
+ unsigned cs_start;
+ } urb;
+
+
+ /* BRW_NEW_CURBE_OFFSETS:
+ */
+ struct {
+ unsigned wm_start;
+ unsigned wm_size;
+ unsigned clip_start;
+ unsigned clip_size;
+ unsigned vs_start;
+ unsigned vs_size;
+ unsigned total_size;
+
+ unsigned gs_offset;
+
+ float *last_buf;
+ unsigned last_bufsz;
+ } curbe;
+
+ struct {
+ struct brw_vs_prog_data *prog_data;
+
+ unsigned prog_gs_offset;
+ unsigned state_gs_offset;
+ } vs;
+
+ struct {
+ struct brw_gs_prog_data *prog_data;
+
+ boolean prog_active;
+ unsigned prog_gs_offset;
+ unsigned state_gs_offset;
+ } gs;
+
+ struct {
+ struct brw_clip_prog_data *prog_data;
+
+ unsigned prog_gs_offset;
+ unsigned vp_gs_offset;
+ unsigned state_gs_offset;
+ } clip;
+
+
+ struct {
+ struct brw_sf_prog_data *prog_data;
+
+ struct pipe_setup_linkage linkage;
+
+ unsigned prog_gs_offset;
+ unsigned vp_gs_offset;
+ unsigned state_gs_offset;
+ } sf;
+
+ struct {
+ struct brw_wm_prog_data *prog_data;
+
+// struct brw_wm_compiler *compile_data;
+
+
+ /**
+ * Array of sampler state uploaded at sampler_gs_offset of BRW_SAMPLER
+ * cache
+ */
+ struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT];
+
+ unsigned render_surf;
+ unsigned nr_surfaces;
+
+ unsigned max_threads;
+ struct pipe_buffer *scratch_buffer;
+ unsigned scratch_buffer_size;
+
+ unsigned sampler_count;
+ unsigned sampler_gs_offset;
+
+ struct brw_surface_binding_table bind;
+ unsigned bind_ss_offset;
+
+ unsigned prog_gs_offset;
+ unsigned state_gs_offset;
+ } wm;
+
+
+ struct {
+ unsigned vp_gs_offset;
+ unsigned state_gs_offset;
+ } cc;
+
+
+ /* Used to give every program string a unique id
+ */
+ unsigned program_id;
+};
+
+
+#define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a)
+
+
+/*======================================================================
+ * brw_vtbl.c
+ */
+void brw_do_flush( struct brw_context *brw,
+ unsigned flags );
+
+
+/*======================================================================
+ * brw_state.c
+ */
+void brw_validate_state(struct brw_context *brw);
+void brw_init_state(struct brw_context *brw);
+void brw_destroy_state(struct brw_context *brw);
+
+
+/*======================================================================
+ * brw_tex.c
+ */
+void brwUpdateTextureState( struct brw_context *brw );
+
+
+/* brw_urb.c
+ */
+void brw_upload_urb_fence(struct brw_context *brw);
+
+void brw_upload_constant_buffer_state(struct brw_context *brw);
+
+void brw_init_surface_functions(struct brw_context *brw);
+void brw_init_state_functions(struct brw_context *brw);
+void brw_init_flush_functions(struct brw_context *brw);
+void brw_init_string_functions(struct brw_context *brw);
+
+/*======================================================================
+ * Inline conversion functions. These are better-typed than the
+ * macros used previously:
+ */
+static inline struct brw_context *
+brw_context( struct pipe_context *ctx )
+{
+ return (struct brw_context *)ctx;
+}
+
+#endif
+
diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c index 6744a8aa4f..f5efe9fc06 100644 --- a/src/gallium/drivers/i965simple/brw_state.c +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -1,434 +1,462 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Zack Rusin <zack@tungstengraphics.com> - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#include "pipe/p_winsys.h" -#include "pipe/p_util.h" -#include "pipe/p_inlines.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_dump.h" - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_state.h" -#include "brw_draw.h" - - -#define DUP( TYPE, VAL ) \ -do { \ - struct TYPE *x = malloc(sizeof(*x)); \ - memcpy(x, VAL, sizeof(*x) ); \ - return x; \ -} while (0) - -/************************************************************************ - * Blend - */ -static void * -brw_create_blend_state(struct pipe_context *pipe, - const struct pipe_blend_state *blend) -{ - DUP( pipe_blend_state, blend ); -} - -static void brw_bind_blend_state(struct pipe_context *pipe, - void *blend) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Blend = (struct pipe_blend_state*)blend; - brw->state.dirty.brw |= BRW_NEW_BLEND; -} - - -static void brw_delete_blend_state(struct pipe_context *pipe, void *blend) -{ - free(blend); -} - -static void brw_set_blend_color( struct pipe_context *pipe, - const struct pipe_blend_color *blend_color ) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.BlendColor = *blend_color; - - brw->state.dirty.brw |= BRW_NEW_BLEND; -} - -/************************************************************************ - * Sampler - */ - -static void * -brw_create_sampler_state(struct pipe_context *pipe, - const struct pipe_sampler_state *sampler) -{ - DUP( pipe_sampler_state, sampler ); -} - -static void brw_bind_sampler_state(struct pipe_context *pipe, - unsigned unit, void *sampler) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Samplers[unit] = sampler; - brw->state.dirty.brw |= BRW_NEW_SAMPLER; -} - -static void brw_delete_sampler_state(struct pipe_context *pipe, - void *sampler) -{ - free(sampler); -} - - -/************************************************************************ - * Depth stencil - */ - -static void * -brw_create_depth_stencil_state(struct pipe_context *pipe, - const struct pipe_depth_stencil_alpha_state *depth_stencil) -{ - DUP( pipe_depth_stencil_alpha_state, depth_stencil ); -} - -static void brw_bind_depth_stencil_state(struct pipe_context *pipe, - void *depth_stencil) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.DepthStencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; - - brw->state.dirty.brw |= BRW_NEW_DEPTH_STENCIL; -} - -static void brw_delete_depth_stencil_state(struct pipe_context *pipe, - void *depth_stencil) -{ - free(depth_stencil); -} - -/************************************************************************ - * Scissor - */ -static void brw_set_scissor_state( struct pipe_context *pipe, - const struct pipe_scissor_state *scissor ) -{ - struct brw_context *brw = brw_context(pipe); - - memcpy( &brw->attribs.Scissor, scissor, sizeof(*scissor) ); - brw->state.dirty.brw |= BRW_NEW_SCISSOR; -} - - -/************************************************************************ - * Stipple - */ - -static void brw_set_polygon_stipple( struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple ) -{ -} - - -/************************************************************************ - * Fragment shader - */ - -static void * brw_create_fs_state(struct pipe_context *pipe, - const struct pipe_shader_state *shader) -{ - struct brw_fragment_program *brw_fp = CALLOC_STRUCT(brw_fragment_program); - - /* XXX: Do I have to duplicate the tokens as well?? - */ - brw_fp->program = *shader; - brw_fp->id = brw_context(pipe)->program_id++; - - tgsi_scan_shader(shader->tokens, &brw_fp->info); - -#if 0 - brw_shader_info(shader->tokens, - &brw_fp->info2); -#endif - - tgsi_dump(shader->tokens, 0); - - - return (void *)brw_fp; -} - -static void brw_bind_fs_state(struct pipe_context *pipe, void *shader) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.FragmentProgram = (struct brw_fragment_program *)shader; - brw->state.dirty.brw |= BRW_NEW_FS; -} - -static void brw_delete_fs_state(struct pipe_context *pipe, void *shader) -{ - FREE(shader); -} - - -/************************************************************************ - * Vertex shader and other TNL state - */ - -static void *brw_create_vs_state(struct pipe_context *pipe, - const struct pipe_shader_state *shader) -{ - struct brw_vertex_program *brw_vp = CALLOC_STRUCT(brw_vertex_program); - - /* XXX: Do I have to duplicate the tokens as well?? - */ - brw_vp->program = *shader; - brw_vp->id = brw_context(pipe)->program_id++; - - tgsi_scan_shader(shader->tokens, &brw_vp->info); - -#if 0 - brw_shader_info(shader->tokens, - &brw_vp->info2); -#endif - tgsi_dump(shader->tokens, 0); - - return (void *)brw_vp; -} - -static void brw_bind_vs_state(struct pipe_context *pipe, void *vs) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.VertexProgram = (struct brw_vertex_program *)vs; - brw->state.dirty.brw |= BRW_NEW_VS; - - debug_printf("YYYYYYYYYYYYY BINDING VERTEX SHADER\n"); -} - -static void brw_delete_vs_state(struct pipe_context *pipe, void *shader) -{ - FREE(shader); -} - - -static void brw_set_clip_state( struct pipe_context *pipe, - const struct pipe_clip_state *clip ) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Clip = *clip; -} - - -static void brw_set_viewport_state( struct pipe_context *pipe, - const struct pipe_viewport_state *viewport ) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Viewport = *viewport; /* struct copy */ - brw->state.dirty.brw |= BRW_NEW_VIEWPORT; - - /* pass the viewport info to the draw module */ - //draw_set_viewport_state(brw->draw, viewport); -} - - -static void brw_set_vertex_buffer( struct pipe_context *pipe, - unsigned index, - const struct pipe_vertex_buffer *buffer ) -{ - struct brw_context *brw = brw_context(pipe); - brw->vb.vbo_array[index] = buffer; -} - -static void brw_set_vertex_element(struct pipe_context *pipe, - unsigned index, - const struct pipe_vertex_element *element) -{ - /* flush ? */ - struct brw_context *brw = brw_context(pipe); - - assert(index < PIPE_ATTRIB_MAX); - struct brw_vertex_element_state el; - memset(&el, 0, sizeof(el)); - - el.ve0.src_offset = element->src_offset; - el.ve0.src_format = brw_translate_surface_format(element->src_format); - el.ve0.valid = 1; - el.ve0.vertex_buffer_index = element->vertex_buffer_index; - - el.ve1.dst_offset = index * 4; - - el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC; - el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC; - el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC; - el.ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC; - - switch (element->nr_components) { - case 1: el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0; - case 2: el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0; - case 3: el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT; - break; - } - - brw->vb.inputs[index] = el; -} - - - -/************************************************************************ - * Constant buffers - */ - -static void brw_set_constant_buffer(struct pipe_context *pipe, - uint shader, uint index, - const struct pipe_constant_buffer *buf) -{ - struct brw_context *brw = brw_context(pipe); - - assert(buf == 0 || index == 0); - - brw->attribs.Constants[shader] = buf; - brw->state.dirty.brw |= BRW_NEW_CONSTANTS; -} - - -/************************************************************************ - * Texture surfaces - */ - - -static void brw_set_sampler_texture(struct pipe_context *pipe, - unsigned unit, - struct pipe_texture *texture) -{ - struct brw_context *brw = brw_context(pipe); - - pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[unit], - texture); - - brw->state.dirty.brw |= BRW_NEW_TEXTURE; -} - - -/************************************************************************ - * Render targets, etc - */ - -static void brw_set_framebuffer_state(struct pipe_context *pipe, - const struct pipe_framebuffer_state *fb) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.FrameBuffer = *fb; /* struct copy */ - - brw->state.dirty.brw |= BRW_NEW_FRAMEBUFFER; -} - - - -/************************************************************************ - * Rasterizer state - */ - -static void * -brw_create_rasterizer_state(struct pipe_context *pipe, - const struct pipe_rasterizer_state *rasterizer) -{ - DUP(pipe_rasterizer_state, rasterizer); -} - -static void brw_bind_rasterizer_state( struct pipe_context *pipe, - void *setup ) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Raster = (struct pipe_rasterizer_state *)setup; - - /* Also pass-through to draw module: - */ - //draw_set_rasterizer_state(brw->draw, setup); - - brw->state.dirty.brw |= BRW_NEW_RASTERIZER; -} - -static void brw_delete_rasterizer_state(struct pipe_context *pipe, - void *setup) -{ - free(setup); -} - - - -void -brw_init_state_functions( struct brw_context *brw ) -{ - brw->pipe.create_blend_state = brw_create_blend_state; - brw->pipe.bind_blend_state = brw_bind_blend_state; - brw->pipe.delete_blend_state = brw_delete_blend_state; - - brw->pipe.create_sampler_state = brw_create_sampler_state; - brw->pipe.bind_sampler_state = brw_bind_sampler_state; - brw->pipe.delete_sampler_state = brw_delete_sampler_state; - - brw->pipe.create_depth_stencil_alpha_state = brw_create_depth_stencil_state; - brw->pipe.bind_depth_stencil_alpha_state = brw_bind_depth_stencil_state; - brw->pipe.delete_depth_stencil_alpha_state = brw_delete_depth_stencil_state; - - brw->pipe.create_rasterizer_state = brw_create_rasterizer_state; - brw->pipe.bind_rasterizer_state = brw_bind_rasterizer_state; - brw->pipe.delete_rasterizer_state = brw_delete_rasterizer_state; - brw->pipe.create_fs_state = brw_create_fs_state; - brw->pipe.bind_fs_state = brw_bind_fs_state; - brw->pipe.delete_fs_state = brw_delete_fs_state; - brw->pipe.create_vs_state = brw_create_vs_state; - brw->pipe.bind_vs_state = brw_bind_vs_state; - brw->pipe.delete_vs_state = brw_delete_vs_state; - - brw->pipe.set_blend_color = brw_set_blend_color; - brw->pipe.set_clip_state = brw_set_clip_state; - brw->pipe.set_constant_buffer = brw_set_constant_buffer; - brw->pipe.set_framebuffer_state = brw_set_framebuffer_state; - -// brw->pipe.set_feedback_state = brw_set_feedback_state; -// brw->pipe.set_feedback_buffer = brw_set_feedback_buffer; - - brw->pipe.set_polygon_stipple = brw_set_polygon_stipple; - brw->pipe.set_scissor_state = brw_set_scissor_state; - brw->pipe.set_sampler_texture = brw_set_sampler_texture; - brw->pipe.set_viewport_state = brw_set_viewport_state; - brw->pipe.set_vertex_buffer = brw_set_vertex_buffer; - brw->pipe.set_vertex_element = brw_set_vertex_element; -} +/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Zack Rusin <zack@tungstengraphics.com>
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "pipe/p_winsys.h"
+#include "pipe/p_util.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/util/tgsi_dump.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_state.h"
+#include "brw_draw.h"
+
+
+#define DUP( TYPE, VAL ) \
+do { \
+ struct TYPE *x = malloc(sizeof(*x)); \
+ memcpy(x, VAL, sizeof(*x) ); \
+ return x; \
+} while (0)
+
+/************************************************************************
+ * Blend
+ */
+static void *
+brw_create_blend_state(struct pipe_context *pipe,
+ const struct pipe_blend_state *blend)
+{
+ DUP( pipe_blend_state, blend );
+}
+
+static void brw_bind_blend_state(struct pipe_context *pipe,
+ void *blend)
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->attribs.Blend = (struct pipe_blend_state*)blend;
+ brw->state.dirty.brw |= BRW_NEW_BLEND;
+}
+
+
+static void brw_delete_blend_state(struct pipe_context *pipe, void *blend)
+{
+ free(blend);
+}
+
+static void brw_set_blend_color( struct pipe_context *pipe,
+ const struct pipe_blend_color *blend_color )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->attribs.BlendColor = *blend_color;
+
+ brw->state.dirty.brw |= BRW_NEW_BLEND;
+}
+
+/************************************************************************
+ * Sampler
+ */
+
+static void *
+brw_create_sampler_state(struct pipe_context *pipe,
+ const struct pipe_sampler_state *sampler)
+{
+ DUP( pipe_sampler_state, sampler );
+}
+
+static void brw_bind_sampler_states(struct pipe_context *pipe,
+ unsigned num, void **sampler)
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ assert(num <= PIPE_MAX_SAMPLERS);
+
+ /* Check for no-op */
+ if (num == brw->num_samplers &&
+ !memcmp(brw->attribs.Samplers, sampler, num * sizeof(void *)))
+ return;
+
+ memcpy(brw->attribs.Samplers, sampler, num * sizeof(void *));
+ memset(&brw->attribs.Samplers[num], 0, (PIPE_MAX_SAMPLERS - num) *
+ sizeof(void *));
+
+ brw->num_samplers = num;
+
+ brw->state.dirty.brw |= BRW_NEW_SAMPLER;
+}
+
+static void brw_delete_sampler_state(struct pipe_context *pipe,
+ void *sampler)
+{
+ free(sampler);
+}
+
+
+/************************************************************************
+ * Depth stencil
+ */
+
+static void *
+brw_create_depth_stencil_state(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *depth_stencil)
+{
+ DUP( pipe_depth_stencil_alpha_state, depth_stencil );
+}
+
+static void brw_bind_depth_stencil_state(struct pipe_context *pipe,
+ void *depth_stencil)
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->attribs.DepthStencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil;
+
+ brw->state.dirty.brw |= BRW_NEW_DEPTH_STENCIL;
+}
+
+static void brw_delete_depth_stencil_state(struct pipe_context *pipe,
+ void *depth_stencil)
+{
+ free(depth_stencil);
+}
+
+/************************************************************************
+ * Scissor
+ */
+static void brw_set_scissor_state( struct pipe_context *pipe,
+ const struct pipe_scissor_state *scissor )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ memcpy( &brw->attribs.Scissor, scissor, sizeof(*scissor) );
+ brw->state.dirty.brw |= BRW_NEW_SCISSOR;
+}
+
+
+/************************************************************************
+ * Stipple
+ */
+
+static void brw_set_polygon_stipple( struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple )
+{
+}
+
+
+/************************************************************************
+ * Fragment shader
+ */
+
+static void * brw_create_fs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *shader)
+{
+ struct brw_fragment_program *brw_fp = CALLOC_STRUCT(brw_fragment_program);
+
+ /* XXX: Do I have to duplicate the tokens as well??
+ */
+ brw_fp->program = *shader;
+ brw_fp->id = brw_context(pipe)->program_id++;
+
+ tgsi_scan_shader(shader->tokens, &brw_fp->info);
+
+#if 0
+ brw_shader_info(shader->tokens,
+ &brw_fp->info2);
+#endif
+
+ tgsi_dump(shader->tokens, 0);
+
+
+ return (void *)brw_fp;
+}
+
+static void brw_bind_fs_state(struct pipe_context *pipe, void *shader)
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->attribs.FragmentProgram = (struct brw_fragment_program *)shader;
+ brw->state.dirty.brw |= BRW_NEW_FS;
+}
+
+static void brw_delete_fs_state(struct pipe_context *pipe, void *shader)
+{
+ FREE(shader);
+}
+
+
+/************************************************************************
+ * Vertex shader and other TNL state
+ */
+
+static void *brw_create_vs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *shader)
+{
+ struct brw_vertex_program *brw_vp = CALLOC_STRUCT(brw_vertex_program);
+
+ /* XXX: Do I have to duplicate the tokens as well??
+ */
+ brw_vp->program = *shader;
+ brw_vp->id = brw_context(pipe)->program_id++;
+
+ tgsi_scan_shader(shader->tokens, &brw_vp->info);
+
+#if 0
+ brw_shader_info(shader->tokens,
+ &brw_vp->info2);
+#endif
+ tgsi_dump(shader->tokens, 0);
+
+ return (void *)brw_vp;
+}
+
+static void brw_bind_vs_state(struct pipe_context *pipe, void *vs)
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->attribs.VertexProgram = (struct brw_vertex_program *)vs;
+ brw->state.dirty.brw |= BRW_NEW_VS;
+
+ debug_printf("YYYYYYYYYYYYY BINDING VERTEX SHADER\n");
+}
+
+static void brw_delete_vs_state(struct pipe_context *pipe, void *shader)
+{
+ FREE(shader);
+}
+
+
+static void brw_set_clip_state( struct pipe_context *pipe,
+ const struct pipe_clip_state *clip )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->attribs.Clip = *clip;
+}
+
+
+static void brw_set_viewport_state( struct pipe_context *pipe,
+ const struct pipe_viewport_state *viewport )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->attribs.Viewport = *viewport; /* struct copy */
+ brw->state.dirty.brw |= BRW_NEW_VIEWPORT;
+
+ /* pass the viewport info to the draw module */
+ //draw_set_viewport_state(brw->draw, viewport);
+}
+
+
+static void brw_set_vertex_buffer( struct pipe_context *pipe,
+ unsigned index,
+ const struct pipe_vertex_buffer *buffer )
+{
+ struct brw_context *brw = brw_context(pipe);
+ brw->vb.vbo_array[index] = buffer;
+}
+
+static void brw_set_vertex_element(struct pipe_context *pipe,
+ unsigned index,
+ const struct pipe_vertex_element *element)
+{
+ /* flush ? */
+ struct brw_context *brw = brw_context(pipe);
+
+ assert(index < PIPE_ATTRIB_MAX);
+ struct brw_vertex_element_state el;
+ memset(&el, 0, sizeof(el));
+
+ el.ve0.src_offset = element->src_offset;
+ el.ve0.src_format = brw_translate_surface_format(element->src_format);
+ el.ve0.valid = 1;
+ el.ve0.vertex_buffer_index = element->vertex_buffer_index;
+
+ el.ve1.dst_offset = index * 4;
+
+ el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC;
+ el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC;
+ el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC;
+ el.ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC;
+
+ switch (element->nr_components) {
+ case 1: el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0;
+ case 2: el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0;
+ case 3: el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT;
+ break;
+ }
+
+ brw->vb.inputs[index] = el;
+}
+
+
+
+/************************************************************************
+ * Constant buffers
+ */
+
+static void brw_set_constant_buffer(struct pipe_context *pipe,
+ uint shader, uint index,
+ const struct pipe_constant_buffer *buf)
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ assert(buf == 0 || index == 0);
+
+ brw->attribs.Constants[shader] = buf;
+ brw->state.dirty.brw |= BRW_NEW_CONSTANTS;
+}
+
+
+/************************************************************************
+ * Texture surfaces
+ */
+
+
+static void brw_set_sampler_textures(struct pipe_context *pipe,
+ unsigned num,
+ struct pipe_texture **texture)
+{
+ struct brw_context *brw = brw_context(pipe);
+ uint i;
+
+ assert(num <= PIPE_MAX_SAMPLERS);
+
+ /* Check for no-op */
+ if (num == brw->num_textures &&
+ !memcmp(brw->attribs.Texture, texture, num *
+ sizeof(struct pipe_texture *)))
+ return;
+
+ for (i = 0; i < num; i++)
+ pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[i],
+ texture[i]);
+
+ for (i = num; i < brw->num_textures; i++)
+ pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[i],
+ NULL);
+
+ brw->num_textures = num;
+
+ brw->state.dirty.brw |= BRW_NEW_TEXTURE;
+}
+
+
+/************************************************************************
+ * Render targets, etc
+ */
+
+static void brw_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->attribs.FrameBuffer = *fb; /* struct copy */
+
+ brw->state.dirty.brw |= BRW_NEW_FRAMEBUFFER;
+}
+
+
+
+/************************************************************************
+ * Rasterizer state
+ */
+
+static void *
+brw_create_rasterizer_state(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *rasterizer)
+{
+ DUP(pipe_rasterizer_state, rasterizer);
+}
+
+static void brw_bind_rasterizer_state( struct pipe_context *pipe,
+ void *setup )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->attribs.Raster = (struct pipe_rasterizer_state *)setup;
+
+ /* Also pass-through to draw module:
+ */
+ //draw_set_rasterizer_state(brw->draw, setup);
+
+ brw->state.dirty.brw |= BRW_NEW_RASTERIZER;
+}
+
+static void brw_delete_rasterizer_state(struct pipe_context *pipe,
+ void *setup)
+{
+ free(setup);
+}
+
+
+
+void
+brw_init_state_functions( struct brw_context *brw )
+{
+ brw->pipe.create_blend_state = brw_create_blend_state;
+ brw->pipe.bind_blend_state = brw_bind_blend_state;
+ brw->pipe.delete_blend_state = brw_delete_blend_state;
+
+ brw->pipe.create_sampler_state = brw_create_sampler_state;
+ brw->pipe.bind_sampler_states = brw_bind_sampler_states;
+ brw->pipe.delete_sampler_state = brw_delete_sampler_state;
+
+ brw->pipe.create_depth_stencil_alpha_state = brw_create_depth_stencil_state;
+ brw->pipe.bind_depth_stencil_alpha_state = brw_bind_depth_stencil_state;
+ brw->pipe.delete_depth_stencil_alpha_state = brw_delete_depth_stencil_state;
+
+ brw->pipe.create_rasterizer_state = brw_create_rasterizer_state;
+ brw->pipe.bind_rasterizer_state = brw_bind_rasterizer_state;
+ brw->pipe.delete_rasterizer_state = brw_delete_rasterizer_state;
+ brw->pipe.create_fs_state = brw_create_fs_state;
+ brw->pipe.bind_fs_state = brw_bind_fs_state;
+ brw->pipe.delete_fs_state = brw_delete_fs_state;
+ brw->pipe.create_vs_state = brw_create_vs_state;
+ brw->pipe.bind_vs_state = brw_bind_vs_state;
+ brw->pipe.delete_vs_state = brw_delete_vs_state;
+
+ brw->pipe.set_blend_color = brw_set_blend_color;
+ brw->pipe.set_clip_state = brw_set_clip_state;
+ brw->pipe.set_constant_buffer = brw_set_constant_buffer;
+ brw->pipe.set_framebuffer_state = brw_set_framebuffer_state;
+
+// brw->pipe.set_feedback_state = brw_set_feedback_state;
+// brw->pipe.set_feedback_buffer = brw_set_feedback_buffer;
+
+ brw->pipe.set_polygon_stipple = brw_set_polygon_stipple;
+ brw->pipe.set_scissor_state = brw_set_scissor_state;
+ brw->pipe.set_sampler_textures = brw_set_sampler_textures;
+ brw->pipe.set_viewport_state = brw_set_viewport_state;
+ brw->pipe.set_vertex_buffer = brw_set_vertex_buffer;
+ brw->pipe.set_vertex_element = brw_set_vertex_element;
+}
diff --git a/src/gallium/drivers/i965simple/brw_wm_glsl.c b/src/gallium/drivers/i965simple/brw_wm_glsl.c index 44f946ea74..5c90583824 100644 --- a/src/gallium/drivers/i965simple/brw_wm_glsl.c +++ b/src/gallium/drivers/i965simple/brw_wm_glsl.c @@ -124,10 +124,6 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c, */ component = get_ext_swz(src->SrcRegisterExtSwz, component); - /* Can't handle this, don't know if we need to: - */ - assert(src->SrcRegisterExtSwz.ExtDivide == TGSI_EXTSWIZZLE_ONE); - /* Not handling indirect lookups yet: */ assert(src->SrcRegister.Indirect == 0); diff --git a/src/gallium/drivers/i965simple/brw_wm_sampler_state.c b/src/gallium/drivers/i965simple/brw_wm_sampler_state.c index de42ffc5b1..b9eaee56ee 100644 --- a/src/gallium/drivers/i965simple/brw_wm_sampler_state.c +++ b/src/gallium/drivers/i965simple/brw_wm_sampler_state.c @@ -136,6 +136,9 @@ static void brw_update_sampler_state( const struct pipe_sampler_state *pipe_samp case PIPE_TEX_FILTER_LINEAR: sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; break; + case PIPE_TEX_FILTER_ANISO: + sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; + break; default: break; } @@ -155,26 +158,23 @@ static void brw_update_sampler_state( const struct pipe_sampler_state *pipe_samp } /* Set Anisotropy: */ - if (pipe_sampler->max_anisotropy > 1.0) { - sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; - sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC; - - if (pipe_sampler->max_anisotropy > 2.0) { - sampler->ss3.max_aniso = MAX2((pipe_sampler->max_anisotropy - 2) / 2, - BRW_ANISORATIO_16); - } + switch (pipe_sampler->mag_img_filter) { + case PIPE_TEX_FILTER_NEAREST: + sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST; + break; + case PIPE_TEX_FILTER_LINEAR: + sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR; + break; + case PIPE_TEX_FILTER_ANISO: + sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR; + break; + default: + break; } - else { - switch (pipe_sampler->mag_img_filter) { - case PIPE_TEX_FILTER_NEAREST: - sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST; - break; - case PIPE_TEX_FILTER_LINEAR: - sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR; - break; - default: - break; - } + + if (pipe_sampler->max_anisotropy > 2.0) { + sampler->ss3.max_aniso = MAX2((pipe_sampler->max_anisotropy - 2) / 2, + BRW_ANISORATIO_16); } sampler->ss1.s_wrap_mode = translate_wrap_mode(pipe_sampler->wrap_s); @@ -235,7 +235,8 @@ static void upload_wm_samplers(struct brw_context *brw) unsigned sampler_count = 0; /* BRW_NEW_SAMPLER */ - for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { + for (unit = 0; unit < brw->num_textures && unit < brw->num_samplers; + unit++) { /* determine unit enable/disable by looking for a bound texture */ if (brw->attribs.Texture[unit]) { const struct pipe_sampler_state *sampler = brw->attribs.Samplers[unit]; diff --git a/src/gallium/drivers/i965simple/brw_wm_surface_state.c b/src/gallium/drivers/i965simple/brw_wm_surface_state.c index d16d919bce..853c743ccf 100644 --- a/src/gallium/drivers/i965simple/brw_wm_surface_state.c +++ b/src/gallium/drivers/i965simple/brw_wm_surface_state.c @@ -237,7 +237,7 @@ static void upload_wm_surfaces(struct brw_context *brw ) /* BRW_NEW_TEXTURE */ - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { + for (i = 0; i < brw->num_textures && i < brw->num_samplers; i++) { const struct brw_texture *texUnit = brw->attribs.Texture[i]; if (texUnit && diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index fa16ed94e8..316ae552b8 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -1,243 +1,243 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Author: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "draw/draw_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_inlines.h" -#include "pipe/p_util.h" -#include "sp_clear.h" -#include "sp_context.h" -#include "sp_flush.h" -#include "sp_prim_setup.h" -#include "sp_prim_vbuf.h" -#include "sp_state.h" -#include "sp_surface.h" -#include "sp_tile_cache.h" -#include "sp_texture.h" -#include "sp_winsys.h" -#include "sp_query.h" - - - -/** - * Map any drawing surfaces which aren't already mapped - */ -void -softpipe_map_surfaces(struct softpipe_context *sp) -{ - unsigned i; - - for (i = 0; i < sp->framebuffer.num_cbufs; i++) { - sp_tile_cache_map_surfaces(sp->cbuf_cache[i]); - } - - sp_tile_cache_map_surfaces(sp->zsbuf_cache); -} - - -/** - * Unmap any mapped drawing surfaces - */ -void -softpipe_unmap_surfaces(struct softpipe_context *sp) -{ - uint i; - - for (i = 0; i < sp->framebuffer.num_cbufs; i++) - sp_flush_tile_cache(sp, sp->cbuf_cache[i]); - sp_flush_tile_cache(sp, sp->zsbuf_cache); - - for (i = 0; i < sp->framebuffer.num_cbufs; i++) { - sp_tile_cache_unmap_surfaces(sp->cbuf_cache[i]); - } - sp_tile_cache_unmap_surfaces(sp->zsbuf_cache); -} - - -static void softpipe_destroy( struct pipe_context *pipe ) -{ - struct softpipe_context *softpipe = softpipe_context( pipe ); - struct pipe_winsys *ws = pipe->winsys; - uint i; - - draw_destroy( softpipe->draw ); - - softpipe->quad.polygon_stipple->destroy( softpipe->quad.polygon_stipple ); - softpipe->quad.earlyz->destroy( softpipe->quad.earlyz ); - softpipe->quad.shade->destroy( softpipe->quad.shade ); - softpipe->quad.alpha_test->destroy( softpipe->quad.alpha_test ); - softpipe->quad.depth_test->destroy( softpipe->quad.depth_test ); - softpipe->quad.stencil_test->destroy( softpipe->quad.stencil_test ); - softpipe->quad.occlusion->destroy( softpipe->quad.occlusion ); - softpipe->quad.coverage->destroy( softpipe->quad.coverage ); - softpipe->quad.bufloop->destroy( softpipe->quad.bufloop ); - softpipe->quad.blend->destroy( softpipe->quad.blend ); - softpipe->quad.colormask->destroy( softpipe->quad.colormask ); - softpipe->quad.output->destroy( softpipe->quad.output ); - - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) - sp_destroy_tile_cache(softpipe->cbuf_cache[i]); - sp_destroy_tile_cache(softpipe->zsbuf_cache); - - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) - sp_destroy_tile_cache(softpipe->tex_cache[i]); - - for (i = 0; i < Elements(softpipe->constants); i++) { - if (softpipe->constants[i].buffer) { - pipe_buffer_reference(ws, &softpipe->constants[i].buffer, NULL); - } - } - - FREE( softpipe ); -} - - -struct pipe_context * -softpipe_create( struct pipe_screen *screen, - struct pipe_winsys *pipe_winsys, - struct softpipe_winsys *softpipe_winsys ) -{ - struct softpipe_context *softpipe = CALLOC_STRUCT(softpipe_context); - uint i; - -#if defined(__i386__) || defined(__386__) - softpipe->use_sse = GETENV( "GALLIUM_NOSSE" ) == NULL; -#else - softpipe->use_sse = FALSE; -#endif - - softpipe->dump_fs = GETENV( "GALLIUM_DUMP_FS" ) != NULL; - - softpipe->pipe.winsys = pipe_winsys; - softpipe->pipe.screen = screen; - softpipe->pipe.destroy = softpipe_destroy; - - /* state setters */ - softpipe->pipe.create_blend_state = softpipe_create_blend_state; - softpipe->pipe.bind_blend_state = softpipe_bind_blend_state; - softpipe->pipe.delete_blend_state = softpipe_delete_blend_state; - - softpipe->pipe.create_sampler_state = softpipe_create_sampler_state; - softpipe->pipe.bind_sampler_state = softpipe_bind_sampler_state; - softpipe->pipe.delete_sampler_state = softpipe_delete_sampler_state; - - softpipe->pipe.create_depth_stencil_alpha_state = softpipe_create_depth_stencil_state; - softpipe->pipe.bind_depth_stencil_alpha_state = softpipe_bind_depth_stencil_state; - softpipe->pipe.delete_depth_stencil_alpha_state = softpipe_delete_depth_stencil_state; - - softpipe->pipe.create_rasterizer_state = softpipe_create_rasterizer_state; - softpipe->pipe.bind_rasterizer_state = softpipe_bind_rasterizer_state; - softpipe->pipe.delete_rasterizer_state = softpipe_delete_rasterizer_state; - - softpipe->pipe.create_fs_state = softpipe_create_fs_state; - softpipe->pipe.bind_fs_state = softpipe_bind_fs_state; - softpipe->pipe.delete_fs_state = softpipe_delete_fs_state; - - softpipe->pipe.create_vs_state = softpipe_create_vs_state; - softpipe->pipe.bind_vs_state = softpipe_bind_vs_state; - softpipe->pipe.delete_vs_state = softpipe_delete_vs_state; - - softpipe->pipe.set_blend_color = softpipe_set_blend_color; - softpipe->pipe.set_clip_state = softpipe_set_clip_state; - softpipe->pipe.set_constant_buffer = softpipe_set_constant_buffer; - softpipe->pipe.set_framebuffer_state = softpipe_set_framebuffer_state; - softpipe->pipe.set_polygon_stipple = softpipe_set_polygon_stipple; - softpipe->pipe.set_scissor_state = softpipe_set_scissor_state; - softpipe->pipe.set_sampler_texture = softpipe_set_sampler_texture; - softpipe->pipe.set_viewport_state = softpipe_set_viewport_state; - - softpipe->pipe.set_vertex_buffer = softpipe_set_vertex_buffer; - softpipe->pipe.set_vertex_element = softpipe_set_vertex_element; - - softpipe->pipe.draw_arrays = softpipe_draw_arrays; - softpipe->pipe.draw_elements = softpipe_draw_elements; - - softpipe->pipe.clear = softpipe_clear; - softpipe->pipe.flush = softpipe_flush; - - softpipe_init_query_funcs( softpipe ); - softpipe_init_texture_funcs( softpipe ); - - /* - * Alloc caches for accessing drawing surfaces and textures. - * Must be before quad stage setup! - */ - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) - softpipe->cbuf_cache[i] = sp_create_tile_cache(); - softpipe->zsbuf_cache = sp_create_tile_cache(); - - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) - softpipe->tex_cache[i] = sp_create_tile_cache(); - - - /* setup quad rendering stages */ - softpipe->quad.polygon_stipple = sp_quad_polygon_stipple_stage(softpipe); - softpipe->quad.earlyz = sp_quad_earlyz_stage(softpipe); - softpipe->quad.shade = sp_quad_shade_stage(softpipe); - softpipe->quad.alpha_test = sp_quad_alpha_test_stage(softpipe); - softpipe->quad.depth_test = sp_quad_depth_test_stage(softpipe); - softpipe->quad.stencil_test = sp_quad_stencil_test_stage(softpipe); - softpipe->quad.occlusion = sp_quad_occlusion_stage(softpipe); - softpipe->quad.coverage = sp_quad_coverage_stage(softpipe); - softpipe->quad.bufloop = sp_quad_bufloop_stage(softpipe); - softpipe->quad.blend = sp_quad_blend_stage(softpipe); - softpipe->quad.colormask = sp_quad_colormask_stage(softpipe); - softpipe->quad.output = sp_quad_output_stage(softpipe); - - softpipe->winsys = softpipe_winsys; - - /* - * Create drawing context and plug our rendering stage into it. - */ - softpipe->draw = draw_create(); - assert(softpipe->draw); - softpipe->setup = sp_draw_render_stage(softpipe); - - if (GETENV( "SP_VBUF" ) != NULL) { - sp_init_vbuf(softpipe); - } - else { - draw_set_rasterize_stage(softpipe->draw, softpipe->setup); - } - - /* plug in AA line/point stages */ - draw_install_aaline_stage(softpipe->draw, &softpipe->pipe); - draw_install_aapoint_stage(softpipe->draw, &softpipe->pipe); - -#if USE_DRAW_STAGE_PSTIPPLE - /* Do polygon stipple w/ texture map + frag prog? */ - draw_install_pstipple_stage(softpipe->draw, &softpipe->pipe); -#endif - - sp_init_surface_functions(softpipe); - - return &softpipe->pipe; -} +/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Author:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "draw/draw_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_util.h"
+#include "sp_clear.h"
+#include "sp_context.h"
+#include "sp_flush.h"
+#include "sp_prim_setup.h"
+#include "sp_prim_vbuf.h"
+#include "sp_state.h"
+#include "sp_surface.h"
+#include "sp_tile_cache.h"
+#include "sp_texture.h"
+#include "sp_winsys.h"
+#include "sp_query.h"
+
+
+
+/**
+ * Map any drawing surfaces which aren't already mapped
+ */
+void
+softpipe_map_surfaces(struct softpipe_context *sp)
+{
+ unsigned i;
+
+ for (i = 0; i < sp->framebuffer.num_cbufs; i++) {
+ sp_tile_cache_map_surfaces(sp->cbuf_cache[i]);
+ }
+
+ sp_tile_cache_map_surfaces(sp->zsbuf_cache);
+}
+
+
+/**
+ * Unmap any mapped drawing surfaces
+ */
+void
+softpipe_unmap_surfaces(struct softpipe_context *sp)
+{
+ uint i;
+
+ for (i = 0; i < sp->framebuffer.num_cbufs; i++)
+ sp_flush_tile_cache(sp, sp->cbuf_cache[i]);
+ sp_flush_tile_cache(sp, sp->zsbuf_cache);
+
+ for (i = 0; i < sp->framebuffer.num_cbufs; i++) {
+ sp_tile_cache_unmap_surfaces(sp->cbuf_cache[i]);
+ }
+ sp_tile_cache_unmap_surfaces(sp->zsbuf_cache);
+}
+
+
+static void softpipe_destroy( struct pipe_context *pipe )
+{
+ struct softpipe_context *softpipe = softpipe_context( pipe );
+ struct pipe_winsys *ws = pipe->winsys;
+ uint i;
+
+ draw_destroy( softpipe->draw );
+
+ softpipe->quad.polygon_stipple->destroy( softpipe->quad.polygon_stipple );
+ softpipe->quad.earlyz->destroy( softpipe->quad.earlyz );
+ softpipe->quad.shade->destroy( softpipe->quad.shade );
+ softpipe->quad.alpha_test->destroy( softpipe->quad.alpha_test );
+ softpipe->quad.depth_test->destroy( softpipe->quad.depth_test );
+ softpipe->quad.stencil_test->destroy( softpipe->quad.stencil_test );
+ softpipe->quad.occlusion->destroy( softpipe->quad.occlusion );
+ softpipe->quad.coverage->destroy( softpipe->quad.coverage );
+ softpipe->quad.bufloop->destroy( softpipe->quad.bufloop );
+ softpipe->quad.blend->destroy( softpipe->quad.blend );
+ softpipe->quad.colormask->destroy( softpipe->quad.colormask );
+ softpipe->quad.output->destroy( softpipe->quad.output );
+
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++)
+ sp_destroy_tile_cache(softpipe->cbuf_cache[i]);
+ sp_destroy_tile_cache(softpipe->zsbuf_cache);
+
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++)
+ sp_destroy_tile_cache(softpipe->tex_cache[i]);
+
+ for (i = 0; i < Elements(softpipe->constants); i++) {
+ if (softpipe->constants[i].buffer) {
+ pipe_buffer_reference(ws, &softpipe->constants[i].buffer, NULL);
+ }
+ }
+
+ FREE( softpipe );
+}
+
+
+struct pipe_context *
+softpipe_create( struct pipe_screen *screen,
+ struct pipe_winsys *pipe_winsys,
+ struct softpipe_winsys *softpipe_winsys )
+{
+ struct softpipe_context *softpipe = CALLOC_STRUCT(softpipe_context);
+ uint i;
+
+#if defined(__i386__) || defined(__386__)
+ softpipe->use_sse = GETENV( "GALLIUM_NOSSE" ) == NULL;
+#else
+ softpipe->use_sse = FALSE;
+#endif
+
+ softpipe->dump_fs = GETENV( "GALLIUM_DUMP_FS" ) != NULL;
+
+ softpipe->pipe.winsys = pipe_winsys;
+ softpipe->pipe.screen = screen;
+ softpipe->pipe.destroy = softpipe_destroy;
+
+ /* state setters */
+ softpipe->pipe.create_blend_state = softpipe_create_blend_state;
+ softpipe->pipe.bind_blend_state = softpipe_bind_blend_state;
+ softpipe->pipe.delete_blend_state = softpipe_delete_blend_state;
+
+ softpipe->pipe.create_sampler_state = softpipe_create_sampler_state;
+ softpipe->pipe.bind_sampler_states = softpipe_bind_sampler_states;
+ softpipe->pipe.delete_sampler_state = softpipe_delete_sampler_state;
+
+ softpipe->pipe.create_depth_stencil_alpha_state = softpipe_create_depth_stencil_state;
+ softpipe->pipe.bind_depth_stencil_alpha_state = softpipe_bind_depth_stencil_state;
+ softpipe->pipe.delete_depth_stencil_alpha_state = softpipe_delete_depth_stencil_state;
+
+ softpipe->pipe.create_rasterizer_state = softpipe_create_rasterizer_state;
+ softpipe->pipe.bind_rasterizer_state = softpipe_bind_rasterizer_state;
+ softpipe->pipe.delete_rasterizer_state = softpipe_delete_rasterizer_state;
+
+ softpipe->pipe.create_fs_state = softpipe_create_fs_state;
+ softpipe->pipe.bind_fs_state = softpipe_bind_fs_state;
+ softpipe->pipe.delete_fs_state = softpipe_delete_fs_state;
+
+ softpipe->pipe.create_vs_state = softpipe_create_vs_state;
+ softpipe->pipe.bind_vs_state = softpipe_bind_vs_state;
+ softpipe->pipe.delete_vs_state = softpipe_delete_vs_state;
+
+ softpipe->pipe.set_blend_color = softpipe_set_blend_color;
+ softpipe->pipe.set_clip_state = softpipe_set_clip_state;
+ softpipe->pipe.set_constant_buffer = softpipe_set_constant_buffer;
+ softpipe->pipe.set_framebuffer_state = softpipe_set_framebuffer_state;
+ softpipe->pipe.set_polygon_stipple = softpipe_set_polygon_stipple;
+ softpipe->pipe.set_scissor_state = softpipe_set_scissor_state;
+ softpipe->pipe.set_sampler_textures = softpipe_set_sampler_textures;
+ softpipe->pipe.set_viewport_state = softpipe_set_viewport_state;
+
+ softpipe->pipe.set_vertex_buffer = softpipe_set_vertex_buffer;
+ softpipe->pipe.set_vertex_element = softpipe_set_vertex_element;
+
+ softpipe->pipe.draw_arrays = softpipe_draw_arrays;
+ softpipe->pipe.draw_elements = softpipe_draw_elements;
+
+ softpipe->pipe.clear = softpipe_clear;
+ softpipe->pipe.flush = softpipe_flush;
+
+ softpipe_init_query_funcs( softpipe );
+ softpipe_init_texture_funcs( softpipe );
+
+ /*
+ * Alloc caches for accessing drawing surfaces and textures.
+ * Must be before quad stage setup!
+ */
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++)
+ softpipe->cbuf_cache[i] = sp_create_tile_cache();
+ softpipe->zsbuf_cache = sp_create_tile_cache();
+
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++)
+ softpipe->tex_cache[i] = sp_create_tile_cache();
+
+
+ /* setup quad rendering stages */
+ softpipe->quad.polygon_stipple = sp_quad_polygon_stipple_stage(softpipe);
+ softpipe->quad.earlyz = sp_quad_earlyz_stage(softpipe);
+ softpipe->quad.shade = sp_quad_shade_stage(softpipe);
+ softpipe->quad.alpha_test = sp_quad_alpha_test_stage(softpipe);
+ softpipe->quad.depth_test = sp_quad_depth_test_stage(softpipe);
+ softpipe->quad.stencil_test = sp_quad_stencil_test_stage(softpipe);
+ softpipe->quad.occlusion = sp_quad_occlusion_stage(softpipe);
+ softpipe->quad.coverage = sp_quad_coverage_stage(softpipe);
+ softpipe->quad.bufloop = sp_quad_bufloop_stage(softpipe);
+ softpipe->quad.blend = sp_quad_blend_stage(softpipe);
+ softpipe->quad.colormask = sp_quad_colormask_stage(softpipe);
+ softpipe->quad.output = sp_quad_output_stage(softpipe);
+
+ softpipe->winsys = softpipe_winsys;
+
+ /*
+ * Create drawing context and plug our rendering stage into it.
+ */
+ softpipe->draw = draw_create();
+ assert(softpipe->draw);
+ softpipe->setup = sp_draw_render_stage(softpipe);
+
+ if (GETENV( "SP_VBUF" ) != NULL) {
+ sp_init_vbuf(softpipe);
+ }
+ else {
+ draw_set_rasterize_stage(softpipe->draw, softpipe->setup);
+ }
+
+ /* plug in AA line/point stages */
+ draw_install_aaline_stage(softpipe->draw, &softpipe->pipe);
+ draw_install_aapoint_stage(softpipe->draw, &softpipe->pipe);
+
+#if USE_DRAW_STAGE_PSTIPPLE
+ /* Do polygon stipple w/ texture map + frag prog? */
+ draw_install_pstipple_stage(softpipe->draw, &softpipe->pipe);
+#endif
+
+ sp_init_surface_functions(softpipe);
+
+ return &softpipe->pipe;
+}
diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index feeafc7084..19e6cfaf02 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -81,6 +81,9 @@ struct softpipe_context { struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX]; unsigned dirty; + unsigned num_samplers; + unsigned num_textures; + /* Counter for occlusion queries. Note this supports overlapping * queries. */ diff --git a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c index 4ffeac35e1..318916fe30 100644 --- a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c @@ -72,7 +72,7 @@ alpha_test_quad(struct quad_stage *qs, struct quad_header *quad) passMask = MASK_ALL; break; default: - abort(); + assert(0); } quad->mask &= passMask; diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index 17f3ecd0b8..0b79cfa1ed 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -392,7 +392,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) assert(0); /* to do */ break; default: - abort(); + assert(0); } /* @@ -464,7 +464,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) } break; default: - abort(); + assert(0); } @@ -716,7 +716,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */ break; default: - abort(); + assert(0); } /* pass blended quad to next stage */ diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index a9a0754f27..a1859f9883 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -185,7 +185,7 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) zmask = MASK_ALL; break; default: - abort(); + assert(0); } quad->mask &= zmask; diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 1fbb2e38c4..9198198db3 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -1,208 +1,209 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Vertices are just an array of floats, with all the attributes - * packed. We currently assume a layout like: - * - * attr[0][0..3] - window position - * attr[1..n][0..3] - remaining attributes. - * - * Attributes are assumed to be 4 floats wide but are packed so that - * all the enabled attributes run contiguously. - */ - -#include "pipe/p_util.h" -#include "pipe/p_defines.h" -#include "pipe/p_shader_tokens.h" - -#include "sp_context.h" -#include "sp_state.h" -#include "sp_headers.h" -#include "sp_quad.h" -#include "sp_texture.h" -#include "sp_tex_sample.h" - - -struct quad_shade_stage -{ - struct quad_stage stage; - struct tgsi_sampler samplers[PIPE_MAX_SAMPLERS]; - struct tgsi_exec_machine machine; - struct tgsi_exec_vector *inputs, *outputs; - int colorOutSlot, depthOutSlot; -}; - - -/** cast wrapper */ -static INLINE struct quad_shade_stage * -quad_shade_stage(struct quad_stage *qs) -{ - return (struct quad_shade_stage *) qs; -} - - - -/** - * Execute fragment shader for the four fragments in the quad. - */ -static void -shade_quad( - struct quad_stage *qs, - struct quad_header *quad ) -{ - struct quad_shade_stage *qss = quad_shade_stage( qs ); - struct softpipe_context *softpipe = qs->softpipe; - struct tgsi_exec_machine *machine = &qss->machine; - - /* Consts do not require 16 byte alignment. */ - machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT]; - - machine->InterpCoefs = quad->coef; - - /* run shader */ - quad->mask &= softpipe->fs->run( softpipe->fs, - &qss->machine, - quad ); - - /* store result color */ - if (qss->colorOutSlot >= 0) { - /* XXX need to handle multiple color outputs someday */ - assert(qss->stage.softpipe->fs->info.output_semantic_name[qss->colorOutSlot] - == TGSI_SEMANTIC_COLOR); - memcpy( - quad->outputs.color, - &machine->Outputs[qss->colorOutSlot].xyzw[0].f[0], - sizeof( quad->outputs.color ) ); - } - - /* - * XXX the following code for updating quad->outputs.depth - * isn't really needed if we did early z testing. - */ - - /* store result Z */ - if (qss->depthOutSlot >= 0) { - /* output[slot] is new Z */ - uint i; - for (i = 0; i < 4; i++) { - quad->outputs.depth[i] = machine->Outputs[0].xyzw[2].f[i]; - } - } - else { - /* copy input Z (which was interpolated by the executor) to output Z */ - uint i; - for (i = 0; i < 4; i++) { - quad->outputs.depth[i] = machine->Inputs[0].xyzw[2].f[i]; - /* XXX not sure the above line is always correct. The following - * might be better: - quad->outputs.depth[i] = machine->QuadPos.xyzw[2].f[i]; - */ - } - } - - /* shader may cull fragments */ - if( quad->mask ) { - qs->next->run( qs->next, quad ); - } -} - -/** - * Per-primitive (or per-begin?) setup - */ -static void shade_begin(struct quad_stage *qs) -{ - struct quad_shade_stage *qss = quad_shade_stage(qs); - struct softpipe_context *softpipe = qs->softpipe; - unsigned i; - - /* set TGSI sampler state that varies */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - qss->samplers[i].state = softpipe->sampler[i]; - qss->samplers[i].texture = softpipe->texture[i]; - } - - /* find output slots for depth, color */ - qss->colorOutSlot = -1; - qss->depthOutSlot = -1; - for (i = 0; i < qss->stage.softpipe->fs->info.num_outputs; i++) { - switch (qss->stage.softpipe->fs->info.output_semantic_name[i]) { - case TGSI_SEMANTIC_POSITION: - qss->depthOutSlot = i; - break; - case TGSI_SEMANTIC_COLOR: - qss->colorOutSlot = i; - break; - } - } - - softpipe->fs->prepare( softpipe->fs, - &qss->machine, - qss->samplers ); - - qs->next->begin(qs->next); -} - - -static void shade_destroy(struct quad_stage *qs) -{ - struct quad_shade_stage *qss = (struct quad_shade_stage *) qs; - - tgsi_exec_machine_free_data(&qss->machine); - FREE( qss->inputs ); - FREE( qss->outputs ); - FREE( qs ); -} - - -struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe ) -{ - struct quad_shade_stage *qss = CALLOC_STRUCT(quad_shade_stage); - uint i; - - /* allocate storage for program inputs/outputs, aligned to 16 bytes */ - qss->inputs = MALLOC(PIPE_ATTRIB_MAX * sizeof(*qss->inputs) + 16); - qss->outputs = MALLOC(PIPE_ATTRIB_MAX * sizeof(*qss->outputs) + 16); - qss->machine.Inputs = align16(qss->inputs); - qss->machine.Outputs = align16(qss->outputs); - - qss->stage.softpipe = softpipe; - qss->stage.begin = shade_begin; - qss->stage.run = shade_quad; - qss->stage.destroy = shade_destroy; - - /* set TGSI sampler state that's constant */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - assert(softpipe->tex_cache[i]); - qss->samplers[i].get_samples = sp_get_samples; - qss->samplers[i].pipe = &softpipe->pipe; - qss->samplers[i].cache = softpipe->tex_cache[i]; - } - - tgsi_exec_machine_init( &qss->machine ); - - return &qss->stage; -} +/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Vertices are just an array of floats, with all the attributes
+ * packed. We currently assume a layout like:
+ *
+ * attr[0][0..3] - window position
+ * attr[1..n][0..3] - remaining attributes.
+ *
+ * Attributes are assumed to be 4 floats wide but are packed so that
+ * all the enabled attributes run contiguously.
+ */
+
+#include "pipe/p_util.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+
+#include "sp_context.h"
+#include "sp_state.h"
+#include "sp_headers.h"
+#include "sp_quad.h"
+#include "sp_texture.h"
+#include "sp_tex_sample.h"
+
+
+struct quad_shade_stage
+{
+ struct quad_stage stage;
+ struct tgsi_sampler samplers[PIPE_MAX_SAMPLERS];
+ struct tgsi_exec_machine machine;
+ struct tgsi_exec_vector *inputs, *outputs;
+ int colorOutSlot, depthOutSlot;
+};
+
+
+/** cast wrapper */
+static INLINE struct quad_shade_stage *
+quad_shade_stage(struct quad_stage *qs)
+{
+ return (struct quad_shade_stage *) qs;
+}
+
+
+
+/**
+ * Execute fragment shader for the four fragments in the quad.
+ */
+static void
+shade_quad(
+ struct quad_stage *qs,
+ struct quad_header *quad )
+{
+ struct quad_shade_stage *qss = quad_shade_stage( qs );
+ struct softpipe_context *softpipe = qs->softpipe;
+ struct tgsi_exec_machine *machine = &qss->machine;
+
+ /* Consts do not require 16 byte alignment. */
+ machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT];
+
+ machine->InterpCoefs = quad->coef;
+
+ /* run shader */
+ quad->mask &= softpipe->fs->run( softpipe->fs,
+ &qss->machine,
+ quad );
+
+ /* store result color */
+ if (qss->colorOutSlot >= 0) {
+ /* XXX need to handle multiple color outputs someday */
+ assert(qss->stage.softpipe->fs->info.output_semantic_name[qss->colorOutSlot]
+ == TGSI_SEMANTIC_COLOR);
+ memcpy(
+ quad->outputs.color,
+ &machine->Outputs[qss->colorOutSlot].xyzw[0].f[0],
+ sizeof( quad->outputs.color ) );
+ }
+
+ /*
+ * XXX the following code for updating quad->outputs.depth
+ * isn't really needed if we did early z testing.
+ */
+
+ /* store result Z */
+ if (qss->depthOutSlot >= 0) {
+ /* output[slot] is new Z */
+ uint i;
+ for (i = 0; i < 4; i++) {
+ quad->outputs.depth[i] = machine->Outputs[0].xyzw[2].f[i];
+ }
+ }
+ else {
+ /* copy input Z (which was interpolated by the executor) to output Z */
+ uint i;
+ for (i = 0; i < 4; i++) {
+ quad->outputs.depth[i] = machine->Inputs[0].xyzw[2].f[i];
+ /* XXX not sure the above line is always correct. The following
+ * might be better:
+ quad->outputs.depth[i] = machine->QuadPos.xyzw[2].f[i];
+ */
+ }
+ }
+
+ /* shader may cull fragments */
+ if( quad->mask ) {
+ qs->next->run( qs->next, quad );
+ }
+}
+
+/**
+ * Per-primitive (or per-begin?) setup
+ */
+static void shade_begin(struct quad_stage *qs)
+{
+ struct quad_shade_stage *qss = quad_shade_stage(qs);
+ struct softpipe_context *softpipe = qs->softpipe;
+ unsigned i;
+ unsigned num = MAX2(softpipe->num_textures, softpipe->num_samplers);
+
+ /* set TGSI sampler state that varies */
+ for (i = 0; i < num; i++) {
+ qss->samplers[i].state = softpipe->sampler[i];
+ qss->samplers[i].texture = softpipe->texture[i];
+ }
+
+ /* find output slots for depth, color */
+ qss->colorOutSlot = -1;
+ qss->depthOutSlot = -1;
+ for (i = 0; i < qss->stage.softpipe->fs->info.num_outputs; i++) {
+ switch (qss->stage.softpipe->fs->info.output_semantic_name[i]) {
+ case TGSI_SEMANTIC_POSITION:
+ qss->depthOutSlot = i;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ qss->colorOutSlot = i;
+ break;
+ }
+ }
+
+ softpipe->fs->prepare( softpipe->fs,
+ &qss->machine,
+ qss->samplers );
+
+ qs->next->begin(qs->next);
+}
+
+
+static void shade_destroy(struct quad_stage *qs)
+{
+ struct quad_shade_stage *qss = (struct quad_shade_stage *) qs;
+
+ tgsi_exec_machine_free_data(&qss->machine);
+ FREE( qss->inputs );
+ FREE( qss->outputs );
+ FREE( qs );
+}
+
+
+struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe )
+{
+ struct quad_shade_stage *qss = CALLOC_STRUCT(quad_shade_stage);
+ uint i;
+
+ /* allocate storage for program inputs/outputs, aligned to 16 bytes */
+ qss->inputs = MALLOC(PIPE_ATTRIB_MAX * sizeof(*qss->inputs) + 16);
+ qss->outputs = MALLOC(PIPE_ATTRIB_MAX * sizeof(*qss->outputs) + 16);
+ qss->machine.Inputs = align16(qss->inputs);
+ qss->machine.Outputs = align16(qss->outputs);
+
+ qss->stage.softpipe = softpipe;
+ qss->stage.begin = shade_begin;
+ qss->stage.run = shade_quad;
+ qss->stage.destroy = shade_destroy;
+
+ /* set TGSI sampler state that's constant */
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ assert(softpipe->tex_cache[i]);
+ qss->samplers[i].get_samples = sp_get_samples;
+ qss->samplers[i].pipe = &softpipe->pipe;
+ qss->samplers[i].cache = softpipe->tex_cache[i];
+ }
+
+ tgsi_exec_machine_init( &qss->machine );
+
+ return &qss->stage;
+}
diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 3943d4ed2b..45d159143f 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -1,195 +1,195 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell <keith@tungstengraphics.com> - */ - -#ifndef SP_STATE_H -#define SP_STATE_H - -#include "pipe/p_state.h" -#include "tgsi/util/tgsi_scan.h" - - -#define SP_NEW_VIEWPORT 0x1 -#define SP_NEW_RASTERIZER 0x2 -#define SP_NEW_FS 0x4 -#define SP_NEW_BLEND 0x8 -#define SP_NEW_CLIP 0x10 -#define SP_NEW_SCISSOR 0x20 -#define SP_NEW_STIPPLE 0x40 -#define SP_NEW_FRAMEBUFFER 0x80 -#define SP_NEW_DEPTH_STENCIL_ALPHA 0x100 -#define SP_NEW_CONSTANTS 0x200 -#define SP_NEW_SAMPLER 0x400 -#define SP_NEW_TEXTURE 0x800 -#define SP_NEW_VERTEX 0x1000 -#define SP_NEW_VS 0x2000 -#define SP_NEW_QUERY 0x4000 - - -struct tgsi_sampler; -struct tgsi_exec_machine; - - -/** Subclass of pipe_shader_state (though it doesn't really need to be). - * - * This is starting to look an awful lot like a quad pipeline stage... - */ -struct sp_fragment_shader { - struct pipe_shader_state shader; - - struct tgsi_shader_info info; - - void (*prepare)( const struct sp_fragment_shader *shader, - struct tgsi_exec_machine *machine, - struct tgsi_sampler *samplers); - - /* Run the shader - this interface will get cleaned up in the - * future: - */ - unsigned (*run)( const struct sp_fragment_shader *shader, - struct tgsi_exec_machine *machine, - struct quad_header *quad ); - - - void (*delete)( struct sp_fragment_shader * ); -}; - -struct vertex_info; - -/** Subclass of pipe_shader_state */ -struct sp_vertex_shader { - struct pipe_shader_state shader; - struct draw_vertex_shader *draw_data; -}; - - - -void * -softpipe_create_blend_state(struct pipe_context *, - const struct pipe_blend_state *); -void softpipe_bind_blend_state(struct pipe_context *, - void *); -void softpipe_delete_blend_state(struct pipe_context *, - void *); - -void * -softpipe_create_sampler_state(struct pipe_context *, - const struct pipe_sampler_state *); -void softpipe_bind_sampler_state(struct pipe_context *, unsigned, void *); -void softpipe_delete_sampler_state(struct pipe_context *, void *); - -void * -softpipe_create_depth_stencil_state(struct pipe_context *, - const struct pipe_depth_stencil_alpha_state *); -void softpipe_bind_depth_stencil_state(struct pipe_context *, void *); -void softpipe_delete_depth_stencil_state(struct pipe_context *, void *); - -void * -softpipe_create_rasterizer_state(struct pipe_context *, - const struct pipe_rasterizer_state *); -void softpipe_bind_rasterizer_state(struct pipe_context *, void *); -void softpipe_delete_rasterizer_state(struct pipe_context *, void *); - -void softpipe_set_framebuffer_state( struct pipe_context *, - const struct pipe_framebuffer_state * ); - -void softpipe_set_blend_color( struct pipe_context *pipe, - const struct pipe_blend_color *blend_color ); - -void softpipe_set_clip_state( struct pipe_context *, - const struct pipe_clip_state * ); - -void softpipe_set_constant_buffer(struct pipe_context *, - uint shader, uint index, - const struct pipe_constant_buffer *buf); - -void *softpipe_create_fs_state(struct pipe_context *, - const struct pipe_shader_state *); -void softpipe_bind_fs_state(struct pipe_context *, void *); -void softpipe_delete_fs_state(struct pipe_context *, void *); -void *softpipe_create_vs_state(struct pipe_context *, - const struct pipe_shader_state *); -void softpipe_bind_vs_state(struct pipe_context *, void *); -void softpipe_delete_vs_state(struct pipe_context *, void *); - -void softpipe_set_polygon_stipple( struct pipe_context *, - const struct pipe_poly_stipple * ); - -void softpipe_set_scissor_state( struct pipe_context *, - const struct pipe_scissor_state * ); - -void softpipe_set_sampler_texture( struct pipe_context *, - unsigned unit, - struct pipe_texture * ); - -void softpipe_set_viewport_state( struct pipe_context *, - const struct pipe_viewport_state * ); - -void softpipe_set_vertex_element(struct pipe_context *, - unsigned index, - const struct pipe_vertex_element *); - -void softpipe_set_vertex_buffer(struct pipe_context *, - unsigned index, - const struct pipe_vertex_buffer *); - - -void softpipe_update_derived( struct softpipe_context *softpipe ); - - -boolean softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count); - -boolean softpipe_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, unsigned count); - - -void -softpipe_map_surfaces(struct softpipe_context *sp); - -void -softpipe_unmap_surfaces(struct softpipe_context *sp); - -void -softpipe_map_texture_surfaces(struct softpipe_context *sp); - -void -softpipe_unmap_texture_surfaces(struct softpipe_context *sp); - - -struct vertex_info * -softpipe_get_vertex_info(struct softpipe_context *softpipe); - -struct vertex_info * -softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe); - - -#endif +/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef SP_STATE_H
+#define SP_STATE_H
+
+#include "pipe/p_state.h"
+#include "tgsi/util/tgsi_scan.h"
+
+
+#define SP_NEW_VIEWPORT 0x1
+#define SP_NEW_RASTERIZER 0x2
+#define SP_NEW_FS 0x4
+#define SP_NEW_BLEND 0x8
+#define SP_NEW_CLIP 0x10
+#define SP_NEW_SCISSOR 0x20
+#define SP_NEW_STIPPLE 0x40
+#define SP_NEW_FRAMEBUFFER 0x80
+#define SP_NEW_DEPTH_STENCIL_ALPHA 0x100
+#define SP_NEW_CONSTANTS 0x200
+#define SP_NEW_SAMPLER 0x400
+#define SP_NEW_TEXTURE 0x800
+#define SP_NEW_VERTEX 0x1000
+#define SP_NEW_VS 0x2000
+#define SP_NEW_QUERY 0x4000
+
+
+struct tgsi_sampler;
+struct tgsi_exec_machine;
+
+
+/** Subclass of pipe_shader_state (though it doesn't really need to be).
+ *
+ * This is starting to look an awful lot like a quad pipeline stage...
+ */
+struct sp_fragment_shader {
+ struct pipe_shader_state shader;
+
+ struct tgsi_shader_info info;
+
+ void (*prepare)( const struct sp_fragment_shader *shader,
+ struct tgsi_exec_machine *machine,
+ struct tgsi_sampler *samplers);
+
+ /* Run the shader - this interface will get cleaned up in the
+ * future:
+ */
+ unsigned (*run)( const struct sp_fragment_shader *shader,
+ struct tgsi_exec_machine *machine,
+ struct quad_header *quad );
+
+
+ void (*delete)( struct sp_fragment_shader * );
+};
+
+struct vertex_info;
+
+/** Subclass of pipe_shader_state */
+struct sp_vertex_shader {
+ struct pipe_shader_state shader;
+ struct draw_vertex_shader *draw_data;
+};
+
+
+
+void *
+softpipe_create_blend_state(struct pipe_context *,
+ const struct pipe_blend_state *);
+void softpipe_bind_blend_state(struct pipe_context *,
+ void *);
+void softpipe_delete_blend_state(struct pipe_context *,
+ void *);
+
+void *
+softpipe_create_sampler_state(struct pipe_context *,
+ const struct pipe_sampler_state *);
+void softpipe_bind_sampler_states(struct pipe_context *, unsigned, void **);
+void softpipe_delete_sampler_state(struct pipe_context *, void *);
+
+void *
+softpipe_create_depth_stencil_state(struct pipe_context *,
+ const struct pipe_depth_stencil_alpha_state *);
+void softpipe_bind_depth_stencil_state(struct pipe_context *, void *);
+void softpipe_delete_depth_stencil_state(struct pipe_context *, void *);
+
+void *
+softpipe_create_rasterizer_state(struct pipe_context *,
+ const struct pipe_rasterizer_state *);
+void softpipe_bind_rasterizer_state(struct pipe_context *, void *);
+void softpipe_delete_rasterizer_state(struct pipe_context *, void *);
+
+void softpipe_set_framebuffer_state( struct pipe_context *,
+ const struct pipe_framebuffer_state * );
+
+void softpipe_set_blend_color( struct pipe_context *pipe,
+ const struct pipe_blend_color *blend_color );
+
+void softpipe_set_clip_state( struct pipe_context *,
+ const struct pipe_clip_state * );
+
+void softpipe_set_constant_buffer(struct pipe_context *,
+ uint shader, uint index,
+ const struct pipe_constant_buffer *buf);
+
+void *softpipe_create_fs_state(struct pipe_context *,
+ const struct pipe_shader_state *);
+void softpipe_bind_fs_state(struct pipe_context *, void *);
+void softpipe_delete_fs_state(struct pipe_context *, void *);
+void *softpipe_create_vs_state(struct pipe_context *,
+ const struct pipe_shader_state *);
+void softpipe_bind_vs_state(struct pipe_context *, void *);
+void softpipe_delete_vs_state(struct pipe_context *, void *);
+
+void softpipe_set_polygon_stipple( struct pipe_context *,
+ const struct pipe_poly_stipple * );
+
+void softpipe_set_scissor_state( struct pipe_context *,
+ const struct pipe_scissor_state * );
+
+void softpipe_set_sampler_textures( struct pipe_context *,
+ unsigned num,
+ struct pipe_texture ** );
+
+void softpipe_set_viewport_state( struct pipe_context *,
+ const struct pipe_viewport_state * );
+
+void softpipe_set_vertex_element(struct pipe_context *,
+ unsigned index,
+ const struct pipe_vertex_element *);
+
+void softpipe_set_vertex_buffer(struct pipe_context *,
+ unsigned index,
+ const struct pipe_vertex_buffer *);
+
+
+void softpipe_update_derived( struct softpipe_context *softpipe );
+
+
+boolean softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
+ unsigned start, unsigned count);
+
+boolean softpipe_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned mode, unsigned start, unsigned count);
+
+
+void
+softpipe_map_surfaces(struct softpipe_context *sp);
+
+void
+softpipe_unmap_surfaces(struct softpipe_context *sp);
+
+void
+softpipe_map_texture_surfaces(struct softpipe_context *sp);
+
+void
+softpipe_unmap_texture_surfaces(struct softpipe_context *sp);
+
+
+struct vertex_info *
+softpipe_get_vertex_info(struct softpipe_context *softpipe);
+
+struct vertex_info *
+softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe);
+
+
+#endif
diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index eb641ed321..4eefd1d61f 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -120,7 +120,8 @@ softpipe_bind_vs_state(struct pipe_context *pipe, void *vs) softpipe->vs = (const struct sp_vertex_shader *)vs; - draw_bind_vertex_shader(softpipe->draw, softpipe->vs->draw_data); + draw_bind_vertex_shader(softpipe->draw, + (softpipe->vs ? softpipe->vs->draw_data : NULL)); softpipe->dirty |= SP_NEW_VS; } diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index 1d6dd17d1d..033288a0aa 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -50,45 +50,69 @@ softpipe_create_sampler_state(struct pipe_context *pipe, return mem_dup(sampler, sizeof(*sampler)); } + void -softpipe_bind_sampler_state(struct pipe_context *pipe, - unsigned unit, void *sampler) +softpipe_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) { struct softpipe_context *softpipe = softpipe_context(pipe); + unsigned i; + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == softpipe->num_samplers && + !memcmp(softpipe->sampler, sampler, num * sizeof(void *))) + return; draw_flush(softpipe->draw); - assert(unit < PIPE_MAX_SAMPLERS); - softpipe->sampler[unit] = (struct pipe_sampler_state *)sampler; + for (i = 0; i < num; ++i) + softpipe->sampler[i] = sampler[i]; + for (i = num; i < PIPE_MAX_SAMPLERS; ++i) + softpipe->sampler[i] = NULL; + + softpipe->num_samplers = num; softpipe->dirty |= SP_NEW_SAMPLER; } void -softpipe_delete_sampler_state(struct pipe_context *pipe, - void *sampler) +softpipe_set_sampler_textures(struct pipe_context *pipe, + unsigned num, struct pipe_texture **texture) { - FREE( sampler ); -} + struct softpipe_context *softpipe = softpipe_context(pipe); + uint i; + assert(num <= PIPE_MAX_SAMPLERS); -void -softpipe_set_sampler_texture(struct pipe_context *pipe, - unsigned unit, - struct pipe_texture *texture) -{ - struct softpipe_context *softpipe = softpipe_context(pipe); + /* Check for no-op */ + if (num == softpipe->num_textures && + !memcmp(softpipe->texture, texture, num * sizeof(struct pipe_texture *))) + return; draw_flush(softpipe->draw); - assert(unit < PIPE_MAX_SAMPLERS); - pipe_texture_reference(&softpipe->texture[unit], texture); + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + struct pipe_texture *tex = i < num ? texture[i] : NULL; + + pipe_texture_reference(&softpipe->texture[i], tex); + sp_tile_cache_set_texture(pipe, softpipe->tex_cache[i], tex); + } - sp_tile_cache_set_texture(pipe, softpipe->tex_cache[unit], texture); + softpipe->num_textures = num; softpipe->dirty |= SP_NEW_TEXTURE; } +void +softpipe_delete_sampler_state(struct pipe_context *pipe, + void *sampler) +{ + FREE( sampler ); +} + + diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 0ced585c7f..34da6356d7 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -714,6 +714,7 @@ sp_get_samples_2d_common(struct tgsi_sampler *sampler, } break; case PIPE_TEX_FILTER_LINEAR: + case PIPE_TEX_FILTER_ANISO: for (j = 0; j < QUAD_SIZE; j++) { float tx[4][4], a, b; int x0, y0, x1, y1, c; @@ -846,6 +847,7 @@ sp_get_samples_3d(struct tgsi_sampler *sampler, } break; case PIPE_TEX_FILTER_LINEAR: + case PIPE_TEX_FILTER_ANISO: for (j = 0; j < QUAD_SIZE; j++) { float texel0[4][4], texel1[4][4]; float xw, yw, zw; /* interpolation weights */ @@ -972,6 +974,7 @@ sp_get_samples_rect(struct tgsi_sampler *sampler, } break; case PIPE_TEX_FILTER_LINEAR: + case PIPE_TEX_FILTER_ANISO: for (j = 0; j < QUAD_SIZE; j++) { float tx[4][4], a, b; int x0, y0, x1, y1, c; diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index c605ed925b..bbf2d80308 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -1,202 +1,202 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - * Michel Dänzer <michel@tungstengraphics.com> - */ - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_inlines.h" -#include "pipe/p_util.h" -#include "pipe/p_winsys.h" - -#include "sp_context.h" -#include "sp_state.h" -#include "sp_texture.h" -#include "sp_tile_cache.h" - - -/* Simple, maximally packed layout. - */ - -static unsigned minify( unsigned d ) -{ - return MAX2(1, d>>1); -} - - -static void -softpipe_texture_layout(struct softpipe_texture * spt) -{ - struct pipe_texture *pt = &spt->base; - unsigned level; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned depth = pt->depth[0]; - - spt->buffer_size = 0; - - for (level = 0; level <= pt->last_level; level++) { - pt->width[level] = width; - pt->height[level] = height; - pt->depth[level] = depth; - - spt->level_offset[level] = spt->buffer_size; - - spt->buffer_size += ((pt->compressed) ? MAX2(1, height/4) : height) * - ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * - width * pt->cpp; - - width = minify(width); - height = minify(height); - depth = minify(depth); - } -} - - -static struct pipe_texture * -softpipe_texture_create_screen(struct pipe_screen *screen, - const struct pipe_texture *templat) -{ - struct pipe_winsys *ws = screen->winsys; - struct softpipe_texture *spt = CALLOC_STRUCT(softpipe_texture); - if (!spt) - return NULL; - - spt->base = *templat; - spt->base.refcount = 1; - spt->base.screen = screen; - - softpipe_texture_layout(spt); - - spt->buffer = ws->buffer_create(ws, 32, - PIPE_BUFFER_USAGE_PIXEL, - spt->buffer_size); - if (!spt->buffer) { - FREE(spt); - return NULL; - } - - assert(spt->base.refcount == 1); - - return &spt->base; -} - - -static void -softpipe_texture_release_screen(struct pipe_screen *screen, - struct pipe_texture **pt) -{ - if (!*pt) - return; - - /* - DBG("%s %p refcount will be %d\n", - __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); - */ - if (--(*pt)->refcount <= 0) { - struct softpipe_texture *spt = softpipe_texture(*pt); - - /* - DBG("%s deleting %p\n", __FUNCTION__, (void *) spt); - */ - - pipe_buffer_reference(screen->winsys, &spt->buffer, NULL); - - FREE(spt); - } - *pt = NULL; -} - - -static struct pipe_surface * -softpipe_get_tex_surface_screen(struct pipe_screen *screen, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) -{ - struct pipe_winsys *ws = screen->winsys; - struct softpipe_texture *spt = softpipe_texture(pt); - struct pipe_surface *ps; - - assert(level <= pt->last_level); - - ps = ws->surface_alloc(ws); - if (ps) { - assert(ps->refcount); - assert(ps->winsys); - pipe_buffer_reference(ws, &ps->buffer, spt->buffer); - ps->format = pt->format; - ps->cpp = pt->cpp; - ps->width = pt->width[level]; - ps->height = pt->height[level]; - ps->pitch = ps->width; - ps->offset = spt->level_offset[level]; - - if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { - ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * - (pt->compressed ? ps->height/4 : ps->height) * - ps->width * ps->cpp; - } - else { - assert(face == 0); - assert(zslice == 0); - } - } - return ps; -} - - -static void -softpipe_texture_update(struct pipe_context *pipe, - struct pipe_texture *texture) -{ - struct softpipe_context *softpipe = softpipe_context(pipe); - uint unit; - for (unit = 0; unit < PIPE_MAX_SAMPLERS; unit++) { - if (softpipe->texture[unit] == texture) { - sp_flush_tile_cache(softpipe, softpipe->tex_cache[unit]); - } - } -} - - -void -softpipe_init_texture_funcs( struct softpipe_context *softpipe ) -{ - softpipe->pipe.texture_update = softpipe_texture_update; -} - - -void -softpipe_init_screen_texture_funcs(struct pipe_screen *screen) -{ - screen->texture_create = softpipe_texture_create_screen; - screen->texture_release = softpipe_texture_release_screen; - screen->get_tex_surface = softpipe_get_tex_surface_screen; -} +/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ * Michel Dänzer <michel@tungstengraphics.com>
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_util.h"
+#include "pipe/p_winsys.h"
+
+#include "sp_context.h"
+#include "sp_state.h"
+#include "sp_texture.h"
+#include "sp_tile_cache.h"
+
+
+/* Simple, maximally packed layout.
+ */
+
+static unsigned minify( unsigned d )
+{
+ return MAX2(1, d>>1);
+}
+
+
+static void
+softpipe_texture_layout(struct softpipe_texture * spt)
+{
+ struct pipe_texture *pt = &spt->base;
+ unsigned level;
+ unsigned width = pt->width[0];
+ unsigned height = pt->height[0];
+ unsigned depth = pt->depth[0];
+
+ spt->buffer_size = 0;
+
+ for (level = 0; level <= pt->last_level; level++) {
+ pt->width[level] = width;
+ pt->height[level] = height;
+ pt->depth[level] = depth;
+
+ spt->level_offset[level] = spt->buffer_size;
+
+ spt->buffer_size += ((pt->compressed) ? MAX2(1, height/4) : height) *
+ ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) *
+ width * pt->cpp;
+
+ width = minify(width);
+ height = minify(height);
+ depth = minify(depth);
+ }
+}
+
+
+static struct pipe_texture *
+softpipe_texture_create_screen(struct pipe_screen *screen,
+ const struct pipe_texture *templat)
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct softpipe_texture *spt = CALLOC_STRUCT(softpipe_texture);
+ if (!spt)
+ return NULL;
+
+ spt->base = *templat;
+ spt->base.refcount = 1;
+ spt->base.screen = screen;
+
+ softpipe_texture_layout(spt);
+
+ spt->buffer = ws->buffer_create(ws, 32,
+ PIPE_BUFFER_USAGE_PIXEL,
+ spt->buffer_size);
+ if (!spt->buffer) {
+ FREE(spt);
+ return NULL;
+ }
+
+ assert(spt->base.refcount == 1);
+
+ return &spt->base;
+}
+
+
+static void
+softpipe_texture_release_screen(struct pipe_screen *screen,
+ struct pipe_texture **pt)
+{
+ if (!*pt)
+ return;
+
+ /*
+ DBG("%s %p refcount will be %d\n",
+ __FUNCTION__, (void *) *pt, (*pt)->refcount - 1);
+ */
+ if (--(*pt)->refcount <= 0) {
+ struct softpipe_texture *spt = softpipe_texture(*pt);
+
+ /*
+ DBG("%s deleting %p\n", __FUNCTION__, (void *) spt);
+ */
+
+ pipe_buffer_reference(screen->winsys, &spt->buffer, NULL);
+
+ FREE(spt);
+ }
+ *pt = NULL;
+}
+
+
+static struct pipe_surface *
+softpipe_get_tex_surface_screen(struct pipe_screen *screen,
+ struct pipe_texture *pt,
+ unsigned face, unsigned level, unsigned zslice)
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct softpipe_texture *spt = softpipe_texture(pt);
+ struct pipe_surface *ps;
+
+ assert(level <= pt->last_level);
+
+ ps = ws->surface_alloc(ws);
+ if (ps) {
+ assert(ps->refcount);
+ assert(ps->winsys);
+ pipe_buffer_reference(ws, &ps->buffer, spt->buffer);
+ ps->format = pt->format;
+ ps->cpp = pt->cpp;
+ ps->width = pt->width[level];
+ ps->height = pt->height[level];
+ ps->pitch = ps->width;
+ ps->offset = spt->level_offset[level];
+
+ if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) {
+ ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) *
+ (pt->compressed ? ps->height/4 : ps->height) *
+ ps->width * ps->cpp;
+ }
+ else {
+ assert(face == 0);
+ assert(zslice == 0);
+ }
+ }
+ return ps;
+}
+
+
+static void
+softpipe_texture_update(struct pipe_context *pipe,
+ struct pipe_texture *texture)
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+ uint unit;
+ for (unit = 0; unit < softpipe->num_textures; unit++) {
+ if (softpipe->texture[unit] == texture) {
+ sp_flush_tile_cache(softpipe, softpipe->tex_cache[unit]);
+ }
+ }
+}
+
+
+void
+softpipe_init_texture_funcs( struct softpipe_context *softpipe )
+{
+ softpipe->pipe.texture_update = softpipe_texture_update;
+}
+
+
+void
+softpipe_init_screen_texture_funcs(struct pipe_screen *screen)
+{
+ screen->texture_create = softpipe_texture_create_screen;
+ screen->texture_release = softpipe_texture_release_screen;
+ screen->get_tex_surface = softpipe_get_tex_surface_screen;
+}
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 1501b52f3e..b64948f0f3 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -100,7 +100,7 @@ struct pipe_context { void * (*create_sampler_state)(struct pipe_context *, const struct pipe_sampler_state *); - void (*bind_sampler_state)(struct pipe_context *, unsigned unit, void *); + void (*bind_sampler_states)(struct pipe_context *, unsigned num, void **); void (*delete_sampler_state)(struct pipe_context *, void *); void * (*create_rasterizer_state)(struct pipe_context *, @@ -148,9 +148,9 @@ struct pipe_context { /* Currently a sampler is constrained to sample from a single texture: */ - void (*set_sampler_texture)( struct pipe_context *, - unsigned sampler, - struct pipe_texture * ); + void (*set_sampler_textures)( struct pipe_context *, + unsigned num, + struct pipe_texture ** ); void (*set_viewport_state)( struct pipe_context *, const struct pipe_viewport_state * ); diff --git a/src/gallium/include/pipe/p_debug.h b/src/gallium/include/pipe/p_debug.h index 2a11627b36..f45363f355 100644 --- a/src/gallium/include/pipe/p_debug.h +++ b/src/gallium/include/pipe/p_debug.h @@ -41,6 +41,8 @@ #include <stdarg.h> +#include "p_compiler.h" + #ifdef __cplusplus extern "C" { @@ -58,8 +60,22 @@ extern "C" { #endif +/** + * Print debug messages. + * + * A debug message will be printed regardless of the DEBUG/NDEBUG macros. + * + * The actual channel used to output debug message is platform specific. To + * avoid misformating or truncation, follow these rules of thumb: + * - output whole lines + * - avoid outputing large strings (512 bytes is the current maximum length + * that is guaranteed to be printed in all platforms) + */ void debug_printf(const char *format, ...); +/** + * @sa debug_printf + */ void debug_vprintf(const char *format, va_list ap); void debug_assert_fail(const char *expr, const char *file, unsigned line); @@ -79,6 +95,68 @@ void debug_assert_fail(const char *expr, const char *file, unsigned line); #define assert(expr) debug_assert(expr) +/** + * Set a channel's debug mask. + * + * uuid is just a random 32 bit integer that uniquely identifies the debugging + * channel. + * + * @note Due to current implementation issues, make sure the lower 8 bits of + * UUID are unique. + */ +void debug_mask_set(uint32_t uuid, uint32_t mask); + + +uint32_t debug_mask_get(uint32_t uuid); + + +/** + * Conditional debug output. + * + * This is just a generalization of the debug filtering mechanism used + * throughout Gallium. + * + * You use this function as: + * + * @code + * #define MYDRIVER_UUID 0x12345678 // random 32 bit identifier + * + * static inline mydriver_debug(uint32_t what, const char *format, ...) + * { + * #ifdef DEBUG + * va_list ap; + * va_start(ap, format); + * debug_mask_vprintf(MYDRIVER_UUID, what, format, ap); + * va_end(ap); + * #endif + * } + * + * ... + * + * debug_mask_set(MYDRIVER_UUID, + * MYDRIVER_DEBUG_THIS | + * MYDRIVER_DEBUG_THAT | + * ... ); + * + * ... + * + * mydriver_debug(MYDRIVER_DEBUG_THIS, + * "this and this happened\n"); + * + * mydriver_debug(MYDRIVER_DEBUG_THAT, + * "that = %f\n", that); + * ... + * @endcode + * + * You can also define several variants of mydriver_debug, with hardcoded what. + * Note that although macros with variable number of arguments would accomplish + * more in less code, they are not portable. + */ +void debug_mask_vprintf(uint32_t uuid, + uint32_t what, + const char *format, + va_list ap); + #ifdef __cplusplus } #endif diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 0c662d6517..bc938ba253 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -152,7 +152,7 @@ enum pipe_texture_target { */ #define PIPE_TEX_FILTER_NEAREST 0 #define PIPE_TEX_FILTER_LINEAR 1 -/* #define PIPE_TEX_FILTER_ANISO 2 */ +#define PIPE_TEX_FILTER_ANISO 2 #define PIPE_TEX_COMPARE_NONE 0 diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 0a6145a6bf..210169f54f 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -42,15 +42,17 @@ struct tgsi_token unsigned Extended : 1; /* BOOL */ }; -#define TGSI_FILE_NULL 0 -#define TGSI_FILE_CONSTANT 1 -#define TGSI_FILE_INPUT 2 -#define TGSI_FILE_OUTPUT 3 -#define TGSI_FILE_TEMPORARY 4 -#define TGSI_FILE_SAMPLER 5 -#define TGSI_FILE_ADDRESS 6 -#define TGSI_FILE_IMMEDIATE 7 -#define TGSI_FILE_COUNT 8 /**< how many TGSI_FILE_ types */ +enum tgsi_file_type { + TGSI_FILE_NULL =0, + TGSI_FILE_CONSTANT =1, + TGSI_FILE_INPUT =2, + TGSI_FILE_OUTPUT =3, + TGSI_FILE_TEMPORARY =4, + TGSI_FILE_SAMPLER =5, + TGSI_FILE_ADDRESS =6, + TGSI_FILE_IMMEDIATE =7, + TGSI_FILE_COUNT /**< how many TGSI_FILE_ types */ +}; #define TGSI_DECLARE_RANGE 0 @@ -225,6 +227,7 @@ struct tgsi_immediate_float32 #define TGSI_OPCODE_STR 51 #define TGSI_OPCODE_TEX 52 #define TGSI_OPCODE_TXD 53 +#define TGSI_OPCODE_TXP 134 #define TGSI_OPCODE_UP2H 54 #define TGSI_OPCODE_UP2US 55 #define TGSI_OPCODE_UP4B 56 @@ -339,7 +342,7 @@ struct tgsi_immediate_float32 * ps_1_1 */ #define TGSI_OPCODE_TEXCOORD TGSI_OPCODE_NOP -#define TGSI_OPCODE_TEXKILL TGSI_OPCODE_KIL +#define TGSI_OPCODE_TEXKILL TGSI_OPCODE_KILP #define TGSI_OPCODE_TEXBEM 107 #define TGSI_OPCODE_TEXBEML 108 #define TGSI_OPCODE_TEXREG2AR 109 @@ -419,7 +422,7 @@ struct tgsi_immediate_float32 #define TGSI_OPCODE_KIL 132 /* unpredicated kill */ #define TGSI_OPCODE_END 133 /* aka HALT */ -#define TGSI_OPCODE_LAST 134 +#define TGSI_OPCODE_LAST 135 #define TGSI_SAT_NONE 0 /* do not saturate */ #define TGSI_SAT_ZERO_ONE 1 /* clamp to [0,1] */ @@ -652,9 +655,6 @@ struct tgsi_src_register_ext * * NegateX, NegateY, NegateZ and NegateW negate individual components of the * source register. - * - * ExtDivide specifies which component is used to divide all components of the - * source register. */ struct tgsi_src_register_ext_swz @@ -668,8 +668,7 @@ struct tgsi_src_register_ext_swz unsigned NegateY : 1; /* BOOL */ unsigned NegateZ : 1; /* BOOL */ unsigned NegateW : 1; /* BOOL */ - unsigned ExtDivide : 4; /* TGSI_EXTSWIZZLE_ */ - unsigned Padding : 3; + unsigned Padding : 7; unsigned Extended : 1; /* BOOL */ }; diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 5fab41acbd..e338a27383 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -108,6 +108,7 @@ struct pipe_rasterizer_state unsigned line_stipple_enable:1; unsigned line_stipple_factor:8; /**< [1..256] actually */ unsigned line_stipple_pattern:16; + unsigned line_last_pixel:1; unsigned bypass_clipping:1; unsigned origin_lower_left:1; /**< Is (0,0) the lower-left corner? */ @@ -154,7 +155,7 @@ struct pipe_clip_state struct pipe_constant_buffer { struct pipe_buffer *buffer; - unsigned size; /** in bytes */ + unsigned size; /** in bytes (XXX: redundant!) */ }; @@ -248,6 +249,7 @@ struct pipe_sampler_state unsigned compare_mode:1; /**< PIPE_TEX_COMPARE_x */ unsigned compare_func:3; /**< PIPE_FUNC_x */ unsigned normalized_coords:1; /**< Are coords normalized to [0,1]? */ + unsigned prefilter:4; /**< Wierd sampling state exposed by some api's */ float shadow_ambient; /**< shadow test fail color/intensity */ float lod_bias; /**< LOD/lambda bias */ float min_lod, max_lod; /**< LOD clamp range, after bias */ @@ -281,8 +283,6 @@ struct pipe_surface */ struct pipe_texture { - /* Effectively the key: - */ enum pipe_texture_target target; /**< PIPE_TEXTURE_x */ enum pipe_format format; /**< PIPE_FORMAT_x */ diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h index 3b32ba1d24..ef36ce75f7 100644 --- a/src/gallium/include/pipe/p_util.h +++ b/src/gallium/include/pipe/p_util.h @@ -54,7 +54,12 @@ EngFreeMem( static INLINE void * MALLOC( unsigned size ) { +#ifdef WINCE + /* TODO: Need to abstract this */ + return malloc( size ); +#else return EngAllocMem( 0, size, 'D3AG' ); +#endif } static INLINE void * @@ -71,7 +76,12 @@ static INLINE void FREE( void *ptr ) { if( ptr ) { +#ifdef WINCE + /* TODO: Need to abstract this */ + free( ptr ); +#else EngFreeMem( ptr ); +#endif } } diff --git a/src/gallium/include/pipe/p_winsys.h b/src/gallium/include/pipe/p_winsys.h index e784c92491..1383bd0544 100644 --- a/src/gallium/include/pipe/p_winsys.h +++ b/src/gallium/include/pipe/p_winsys.h @@ -104,13 +104,36 @@ struct pipe_winsys * usage is a bitmask of PIPE_BUFFER_USAGE_PIXEL/VERTEX/INDEX/CONSTANT. This * usage argument is only an optimization hint, not a guarantee, therefore * proper behavior must be observed in all circumstances. + * + * alignment indicates the client's alignment requirements, eg for + * SSE instructions. */ struct pipe_buffer *(*buffer_create)( struct pipe_winsys *sws, - unsigned alignment, - unsigned usage, - unsigned size ); + unsigned alignment, + unsigned usage, + unsigned size ); - /** Create a buffer that wraps user-space data */ + /** + * Create a buffer that wraps user-space data. + * + * Effectively this schedules a delayed call to buffer_create + * followed by an upload of the data at *some point in the future*, + * or perhaps never. Basically the allocate/upload is delayed + * until the buffer is actually passed to hardware. + * + * The intention is to provide a quick way to turn regular data + * into a buffer, and secondly to avoid a copy operation if that + * data subsequently turns out to be only accessed by the CPU. + * + * Common example is OpenGL vertex buffers that are subsequently + * processed either by software TNL in the driver or by passing to + * hardware. + * + * XXX: What happens if the delayed call to buffer_create() fails? + * + * Note that ptr may be accessed at any time upto the time when the + * buffer is destroyed, so the data must not be freed before then. + */ struct pipe_buffer *(*user_buffer_create)(struct pipe_winsys *sws, void *ptr, unsigned bytes); diff --git a/src/gallium/winsys/SConscript b/src/gallium/winsys/SConscript index 635a68eea2..f18d3bd2f8 100644 --- a/src/gallium/winsys/SConscript +++ b/src/gallium/winsys/SConscript @@ -5,7 +5,7 @@ if dri: 'dri/SConscript', ]) -if 'xlib' in env['drivers'] and not dri: +if 'xlib' in env['winsys'] and not dri: SConscript([ 'xlib/SConscript', ]) diff --git a/src/mesa/SConscript b/src/mesa/SConscript index a77c3de6c7..678e4ad0e8 100644 --- a/src/mesa/SConscript +++ b/src/mesa/SConscript @@ -161,6 +161,7 @@ STATETRACKER_SOURCES = [ 'state_tracker/st_extensions.c', 'state_tracker/st_format.c', 'state_tracker/st_framebuffer.c', + 'state_tracker/st_gen_mipmap.c', 'state_tracker/st_mesa_to_tgsi.c', 'state_tracker/st_program.c', 'state_tracker/st_texture.c', diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index 5c96be9216..f15e404527 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -1214,19 +1214,30 @@ _mesa_init_teximage_fields(GLcontext *ctx, GLenum target, img->Width = width; img->Height = height; img->Depth = depth; + img->Width2 = width - 2 * border; /* == 1 << img->WidthLog2; */ - img->Height2 = height - 2 * border; /* == 1 << img->HeightLog2; */ - img->Depth2 = depth - 2 * border; /* == 1 << img->DepthLog2; */ img->WidthLog2 = logbase2(img->Width2); - if (height == 1) /* 1-D texture */ + + if (height == 1) { /* 1-D texture */ + img->Height2 = 1; img->HeightLog2 = 0; - else + } + else { + img->Height2 = height - 2 * border; /* == 1 << img->HeightLog2; */ img->HeightLog2 = logbase2(img->Height2); - if (depth == 1) /* 2-D texture */ + } + + if (depth == 1) { /* 2-D texture */ + img->Depth2 = 1; img->DepthLog2 = 0; - else + } + else { + img->Depth2 = depth - 2 * border; /* == 1 << img->DepthLog2; */ img->DepthLog2 = logbase2(img->Depth2); + } + img->MaxLog2 = MAX2(img->WidthLog2, img->HeightLog2); + img->IsCompressed = GL_FALSE; img->CompressedSize = 0; diff --git a/src/mesa/shader/arbprogparse.c b/src/mesa/shader/arbprogparse.c index b6b3c88b14..e385b9d997 100644 --- a/src/mesa/shader/arbprogparse.c +++ b/src/mesa/shader/arbprogparse.c @@ -3910,8 +3910,11 @@ _mesa_parse_arb_fragment_program(GLcontext* ctx, GLenum target, program->Base.NumNativeTexIndirections = ap.Base.NumTexIndirections; program->Base.InputsRead = ap.Base.InputsRead; program->Base.OutputsWritten = ap.Base.OutputsWritten; - for (i = 0; i < MAX_TEXTURE_IMAGE_UNITS; i++) + for (i = 0; i < MAX_TEXTURE_IMAGE_UNITS; i++) { program->Base.TexturesUsed[i] = ap.TexturesUsed[i]; + if (ap.TexturesUsed[i]) + program->Base.SamplersUsed |= (1 << i); + } program->Base.ShadowSamplers = ap.ShadowSamplers; program->FogOption = ap.FogOption; program->UsesKill = ap.UsesKill; diff --git a/src/mesa/sources b/src/mesa/sources index f0bf7b31fb..e3d5f22849 100644 --- a/src/mesa/sources +++ b/src/mesa/sources @@ -184,7 +184,6 @@ STATETRACKER_SOURCES = \ state_tracker/st_cb_readpixels.c \ state_tracker/st_cb_strings.c \ state_tracker/st_cb_texture.c \ - state_tracker/st_cache.c \ state_tracker/st_context.c \ state_tracker/st_debug.c \ state_tracker/st_draw.c \ diff --git a/src/mesa/state_tracker/st_atom_blend.c b/src/mesa/state_tracker/st_atom_blend.c index 2a9d209153..6c13fc8141 100644 --- a/src/mesa/state_tracker/st_atom_blend.c +++ b/src/mesa/state_tracker/st_atom_blend.c @@ -33,11 +33,11 @@ #include "st_context.h" -#include "st_cache.h" #include "st_atom.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" +#include "cso_cache/cso_context.h" /** @@ -155,44 +155,43 @@ translate_logicop(GLenum logicop) static void update_blend( struct st_context *st ) { - struct pipe_blend_state blend; - const struct cso_blend *cso; + struct pipe_blend_state *blend = &st->state.blend; - memset(&blend, 0, sizeof(blend)); + memset(blend, 0, sizeof(*blend)); if (st->ctx->Color.ColorLogicOpEnabled || (st->ctx->Color.BlendEnabled && st->ctx->Color.BlendEquationRGB == GL_LOGIC_OP)) { /* logicop enabled */ - blend.logicop_enable = 1; - blend.logicop_func = translate_logicop(st->ctx->Color.LogicOp); + blend->logicop_enable = 1; + blend->logicop_func = translate_logicop(st->ctx->Color.LogicOp); } else if (st->ctx->Color.BlendEnabled) { /* blending enabled */ - blend.blend_enable = 1; + blend->blend_enable = 1; - blend.rgb_func = translate_blend(st->ctx->Color.BlendEquationRGB); + blend->rgb_func = translate_blend(st->ctx->Color.BlendEquationRGB); if (st->ctx->Color.BlendEquationRGB == GL_MIN || st->ctx->Color.BlendEquationRGB == GL_MAX) { /* Min/max are special */ - blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend.rgb_dst_factor = PIPE_BLENDFACTOR_ONE; + blend->rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend->rgb_dst_factor = PIPE_BLENDFACTOR_ONE; } else { - blend.rgb_src_factor = translate_blend(st->ctx->Color.BlendSrcRGB); - blend.rgb_dst_factor = translate_blend(st->ctx->Color.BlendDstRGB); + blend->rgb_src_factor = translate_blend(st->ctx->Color.BlendSrcRGB); + blend->rgb_dst_factor = translate_blend(st->ctx->Color.BlendDstRGB); } - blend.alpha_func = translate_blend(st->ctx->Color.BlendEquationA); + blend->alpha_func = translate_blend(st->ctx->Color.BlendEquationA); if (st->ctx->Color.BlendEquationA == GL_MIN || st->ctx->Color.BlendEquationA == GL_MAX) { /* Min/max are special */ - blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend.alpha_dst_factor = PIPE_BLENDFACTOR_ONE; + blend->alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend->alpha_dst_factor = PIPE_BLENDFACTOR_ONE; } else { - blend.alpha_src_factor = translate_blend(st->ctx->Color.BlendSrcA); - blend.alpha_dst_factor = translate_blend(st->ctx->Color.BlendDstA); + blend->alpha_src_factor = translate_blend(st->ctx->Color.BlendSrcA); + blend->alpha_dst_factor = translate_blend(st->ctx->Color.BlendDstA); } } else { @@ -201,25 +200,18 @@ update_blend( struct st_context *st ) /* Colormask - maybe reverse these bits? */ if (st->ctx->Color.ColorMask[0]) - blend.colormask |= PIPE_MASK_R; + blend->colormask |= PIPE_MASK_R; if (st->ctx->Color.ColorMask[1]) - blend.colormask |= PIPE_MASK_G; + blend->colormask |= PIPE_MASK_G; if (st->ctx->Color.ColorMask[2]) - blend.colormask |= PIPE_MASK_B; + blend->colormask |= PIPE_MASK_B; if (st->ctx->Color.ColorMask[3]) - blend.colormask |= PIPE_MASK_A; + blend->colormask |= PIPE_MASK_A; if (st->ctx->Color.DitherFlag) - blend.dither = 1; + blend->dither = 1; - cso = st_cached_blend_state(st, &blend); - - if (st->state.blend != cso) { - /* state has changed */ - st->state.blend = cso; - /* bind new state */ - st->pipe->bind_blend_state(st->pipe, cso->data); - } + cso_set_blend(st->cso_context, blend); if (memcmp(st->ctx->Color.BlendColor, &st->state.blend_color, 4 * sizeof(GLfloat)) != 0) { /* state has changed */ diff --git a/src/mesa/state_tracker/st_atom_depth.c b/src/mesa/state_tracker/st_atom_depth.c index 7aecdbfbcc..827ad3b548 100644 --- a/src/mesa/state_tracker/st_atom_depth.c +++ b/src/mesa/state_tracker/st_atom_depth.c @@ -34,10 +34,10 @@ #include "st_context.h" -#include "st_cache.h" #include "st_atom.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" +#include "cso_cache/cso_context.h" /** @@ -93,53 +93,47 @@ gl_stencil_op_to_pipe(GLenum func) static void update_depth_stencil_alpha(struct st_context *st) { - struct pipe_depth_stencil_alpha_state depth_stencil; - const struct cso_depth_stencil_alpha *cso; + struct pipe_depth_stencil_alpha_state *dsa = &st->state.depth_stencil; - memset(&depth_stencil, 0, sizeof(depth_stencil)); + memset(dsa, 0, sizeof(*dsa)); - depth_stencil.depth.enabled = st->ctx->Depth.Test; - depth_stencil.depth.writemask = st->ctx->Depth.Mask; - depth_stencil.depth.func = st_compare_func_to_pipe(st->ctx->Depth.Func); + dsa->depth.enabled = st->ctx->Depth.Test; + dsa->depth.writemask = st->ctx->Depth.Mask; + dsa->depth.func = st_compare_func_to_pipe(st->ctx->Depth.Func); if (st->ctx->Query.CurrentOcclusionObject && st->ctx->Query.CurrentOcclusionObject->Active) - depth_stencil.depth.occlusion_count = 1; + dsa->depth.occlusion_count = 1; if (st->ctx->Stencil.Enabled) { - depth_stencil.stencil[0].enabled = 1; - depth_stencil.stencil[0].func = st_compare_func_to_pipe(st->ctx->Stencil.Function[0]); - depth_stencil.stencil[0].fail_op = gl_stencil_op_to_pipe(st->ctx->Stencil.FailFunc[0]); - depth_stencil.stencil[0].zfail_op = gl_stencil_op_to_pipe(st->ctx->Stencil.ZFailFunc[0]); - depth_stencil.stencil[0].zpass_op = gl_stencil_op_to_pipe(st->ctx->Stencil.ZPassFunc[0]); - depth_stencil.stencil[0].ref_value = st->ctx->Stencil.Ref[0] & 0xff; - depth_stencil.stencil[0].value_mask = st->ctx->Stencil.ValueMask[0] & 0xff; - depth_stencil.stencil[0].write_mask = st->ctx->Stencil.WriteMask[0] & 0xff; + dsa->stencil[0].enabled = 1; + dsa->stencil[0].func = st_compare_func_to_pipe(st->ctx->Stencil.Function[0]); + dsa->stencil[0].fail_op = gl_stencil_op_to_pipe(st->ctx->Stencil.FailFunc[0]); + dsa->stencil[0].zfail_op = gl_stencil_op_to_pipe(st->ctx->Stencil.ZFailFunc[0]); + dsa->stencil[0].zpass_op = gl_stencil_op_to_pipe(st->ctx->Stencil.ZPassFunc[0]); + dsa->stencil[0].ref_value = st->ctx->Stencil.Ref[0] & 0xff; + dsa->stencil[0].value_mask = st->ctx->Stencil.ValueMask[0] & 0xff; + dsa->stencil[0].write_mask = st->ctx->Stencil.WriteMask[0] & 0xff; if (st->ctx->Stencil.TestTwoSide) { - depth_stencil.stencil[1].enabled = 1; - depth_stencil.stencil[1].func = st_compare_func_to_pipe(st->ctx->Stencil.Function[1]); - depth_stencil.stencil[1].fail_op = gl_stencil_op_to_pipe(st->ctx->Stencil.FailFunc[1]); - depth_stencil.stencil[1].zfail_op = gl_stencil_op_to_pipe(st->ctx->Stencil.ZFailFunc[1]); - depth_stencil.stencil[1].zpass_op = gl_stencil_op_to_pipe(st->ctx->Stencil.ZPassFunc[1]); - depth_stencil.stencil[1].ref_value = st->ctx->Stencil.Ref[1] & 0xff; - depth_stencil.stencil[1].value_mask = st->ctx->Stencil.ValueMask[1] & 0xff; - depth_stencil.stencil[1].write_mask = st->ctx->Stencil.WriteMask[1] & 0xff; + dsa->stencil[1].enabled = 1; + dsa->stencil[1].func = st_compare_func_to_pipe(st->ctx->Stencil.Function[1]); + dsa->stencil[1].fail_op = gl_stencil_op_to_pipe(st->ctx->Stencil.FailFunc[1]); + dsa->stencil[1].zfail_op = gl_stencil_op_to_pipe(st->ctx->Stencil.ZFailFunc[1]); + dsa->stencil[1].zpass_op = gl_stencil_op_to_pipe(st->ctx->Stencil.ZPassFunc[1]); + dsa->stencil[1].ref_value = st->ctx->Stencil.Ref[1] & 0xff; + dsa->stencil[1].value_mask = st->ctx->Stencil.ValueMask[1] & 0xff; + dsa->stencil[1].write_mask = st->ctx->Stencil.WriteMask[1] & 0xff; } } if (st->ctx->Color.AlphaEnabled) { - depth_stencil.alpha.enabled = 1; - depth_stencil.alpha.func = st_compare_func_to_pipe(st->ctx->Color.AlphaFunc); - depth_stencil.alpha.ref = st->ctx->Color.AlphaRef; + dsa->alpha.enabled = 1; + dsa->alpha.func = st_compare_func_to_pipe(st->ctx->Color.AlphaFunc); + dsa->alpha.ref = st->ctx->Color.AlphaRef; } - cso = st_cached_depth_stencil_alpha_state(st, &depth_stencil); - if (st->state.depth_stencil != cso) { - /* state has changed */ - st->state.depth_stencil = cso; - st->pipe->bind_depth_stencil_alpha_state(st->pipe, cso->data); /* bind new state */ - } + cso_set_depth_stencil_alpha(st->cso_context, dsa); } diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c b/src/mesa/state_tracker/st_atom_rasterizer.c index 229839d8b2..77cef9236b 100644 --- a/src/mesa/state_tracker/st_atom_rasterizer.c +++ b/src/mesa/state_tracker/st_atom_rasterizer.c @@ -32,10 +32,11 @@ #include "main/macros.h" #include "st_context.h" -#include "st_cache.h" +#include "st_atom.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" -#include "st_atom.h" +#include "cso_cache/cso_context.h" + static GLuint translate_fill( GLenum mode ) { @@ -72,22 +73,21 @@ static GLboolean get_offset_flag( GLuint fill_mode, static void update_raster_state( struct st_context *st ) { GLcontext *ctx = st->ctx; - struct pipe_rasterizer_state raster; - const struct cso_rasterizer *cso; + struct pipe_rasterizer_state *raster = &st->state.rasterizer; const struct gl_vertex_program *vertProg = ctx->VertexProgram._Current; uint i; - memset(&raster, 0, sizeof(raster)); + memset(raster, 0, sizeof(*raster)); - raster.origin_lower_left = 1; /* Always true for OpenGL */ + raster->origin_lower_left = 1; /* Always true for OpenGL */ /* _NEW_POLYGON, _NEW_BUFFERS */ { if (ctx->Polygon.FrontFace == GL_CCW) - raster.front_winding = PIPE_WINDING_CCW; + raster->front_winding = PIPE_WINDING_CCW; else - raster.front_winding = PIPE_WINDING_CW; + raster->front_winding = PIPE_WINDING_CW; /* XXX * I think the intention here is that user-created framebuffer objects @@ -96,13 +96,13 @@ static void update_raster_state( struct st_context *st ) * But this is an implementation/driver-specific artifact - remove... */ if (ctx->DrawBuffer && ctx->DrawBuffer->Name != 0) - raster.front_winding ^= PIPE_WINDING_BOTH; + raster->front_winding ^= PIPE_WINDING_BOTH; } /* _NEW_LIGHT */ if (ctx->Light.ShadeModel == GL_FLAT) - raster.flatshade = 1; + raster->flatshade = 1; /* _NEW_LIGHT | _NEW_PROGRAM * @@ -113,28 +113,28 @@ static void update_raster_state( struct st_context *st ) if (ctx->VertexProgram._Current) { if (ctx->VertexProgram._Enabled) { /* user-defined program */ - raster.light_twoside = ctx->VertexProgram.TwoSideEnabled; + raster->light_twoside = ctx->VertexProgram.TwoSideEnabled; } else { /* TNL-generated program */ - raster.light_twoside = ctx->Light.Enabled && ctx->Light.Model.TwoSide; + raster->light_twoside = ctx->Light.Enabled && ctx->Light.Model.TwoSide; } } else if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) { - raster.light_twoside = 1; + raster->light_twoside = 1; } /* _NEW_POLYGON */ if (ctx->Polygon.CullFlag) { if (ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK) { - raster.cull_mode = PIPE_WINDING_BOTH; + raster->cull_mode = PIPE_WINDING_BOTH; } else if (ctx->Polygon.CullFaceMode == GL_FRONT) { - raster.cull_mode = raster.front_winding; + raster->cull_mode = raster->front_winding; } else { - raster.cull_mode = raster.front_winding ^ PIPE_WINDING_BOTH; + raster->cull_mode = raster->front_winding ^ PIPE_WINDING_BOTH; } } @@ -144,23 +144,23 @@ static void update_raster_state( struct st_context *st ) GLuint fill_front = translate_fill( ctx->Polygon.FrontMode ); GLuint fill_back = translate_fill( ctx->Polygon.BackMode ); - if (raster.front_winding == PIPE_WINDING_CW) { - raster.fill_cw = fill_front; - raster.fill_ccw = fill_back; + if (raster->front_winding == PIPE_WINDING_CW) { + raster->fill_cw = fill_front; + raster->fill_ccw = fill_back; } else { - raster.fill_cw = fill_back; - raster.fill_ccw = fill_front; + raster->fill_cw = fill_back; + raster->fill_ccw = fill_front; } /* Simplify when culling is active: */ - if (raster.cull_mode & PIPE_WINDING_CW) { - raster.fill_cw = raster.fill_ccw; + if (raster->cull_mode & PIPE_WINDING_CW) { + raster->fill_cw = raster->fill_ccw; } - if (raster.cull_mode & PIPE_WINDING_CCW) { - raster.fill_ccw = raster.fill_cw; + if (raster->cull_mode & PIPE_WINDING_CCW) { + raster->fill_ccw = raster->fill_cw; } } @@ -168,95 +168,91 @@ static void update_raster_state( struct st_context *st ) */ if (ctx->Polygon.OffsetUnits != 0.0 || ctx->Polygon.OffsetFactor != 0.0) { - raster.offset_cw = get_offset_flag( raster.fill_cw, &ctx->Polygon ); - raster.offset_ccw = get_offset_flag( raster.fill_ccw, &ctx->Polygon ); - raster.offset_units = ctx->Polygon.OffsetUnits; - raster.offset_scale = ctx->Polygon.OffsetFactor; + raster->offset_cw = get_offset_flag( raster->fill_cw, &ctx->Polygon ); + raster->offset_ccw = get_offset_flag( raster->fill_ccw, &ctx->Polygon ); + raster->offset_units = ctx->Polygon.OffsetUnits; + raster->offset_scale = ctx->Polygon.OffsetFactor; } if (ctx->Polygon.SmoothFlag) - raster.poly_smooth = 1; + raster->poly_smooth = 1; if (ctx->Polygon.StippleFlag) - raster.poly_stipple_enable = 1; + raster->poly_stipple_enable = 1; /* _NEW_BUFFERS, _NEW_POLYGON */ - if (raster.fill_cw != PIPE_POLYGON_MODE_FILL || - raster.fill_ccw != PIPE_POLYGON_MODE_FILL) + if (raster->fill_cw != PIPE_POLYGON_MODE_FILL || + raster->fill_ccw != PIPE_POLYGON_MODE_FILL) { GLfloat mrd = (ctx->DrawBuffer ? ctx->DrawBuffer->_MRD : 1.0); - raster.offset_units = ctx->Polygon.OffsetFactor * mrd; - raster.offset_scale = (ctx->Polygon.OffsetUnits * mrd * + raster->offset_units = ctx->Polygon.OffsetFactor * mrd; + raster->offset_scale = (ctx->Polygon.OffsetUnits * mrd * st->polygon_offset_scale); } /* _NEW_POINT */ - raster.point_size = ctx->Point.Size; - raster.point_smooth = ctx->Point.SmoothFlag; - raster.point_sprite = ctx->Point.PointSprite; + raster->point_size = ctx->Point.Size; + raster->point_smooth = ctx->Point.SmoothFlag; + raster->point_sprite = ctx->Point.PointSprite; for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) { if (ctx->Point.CoordReplace[i]) { if (ctx->Point.SpriteOrigin == GL_UPPER_LEFT) - raster.sprite_coord_mode[i] = PIPE_SPRITE_COORD_UPPER_LEFT; + raster->sprite_coord_mode[i] = PIPE_SPRITE_COORD_UPPER_LEFT; else - raster.sprite_coord_mode[i] = PIPE_SPRITE_COORD_LOWER_LEFT; + raster->sprite_coord_mode[i] = PIPE_SPRITE_COORD_LOWER_LEFT; } else { - raster.sprite_coord_mode[i] = PIPE_SPRITE_COORD_NONE; + raster->sprite_coord_mode[i] = PIPE_SPRITE_COORD_NONE; } } if (vertProg) { if (vertProg->Base.Id == 0) { if (vertProg->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ)) { /* generated program which emits point size */ - raster.point_size_per_vertex = TRUE; + raster->point_size_per_vertex = TRUE; } } else if (ctx->VertexProgram.PointSizeEnabled) { /* user-defined program and GL_VERTEX_PROGRAM_POINT_SIZE set */ - raster.point_size_per_vertex = ctx->VertexProgram.PointSizeEnabled; + raster->point_size_per_vertex = ctx->VertexProgram.PointSizeEnabled; } } /* _NEW_LINE */ - raster.line_smooth = ctx->Line.SmoothFlag; + raster->line_smooth = ctx->Line.SmoothFlag; if (ctx->Line.SmoothFlag) { - raster.line_width = CLAMP(ctx->Line.Width, + raster->line_width = CLAMP(ctx->Line.Width, ctx->Const.MinLineWidthAA, ctx->Const.MaxLineWidthAA); } else { - raster.line_width = CLAMP(ctx->Line.Width, + raster->line_width = CLAMP(ctx->Line.Width, ctx->Const.MinLineWidth, ctx->Const.MaxLineWidth); } - raster.line_stipple_enable = ctx->Line.StippleFlag; - raster.line_stipple_pattern = ctx->Line.StipplePattern; + raster->line_stipple_enable = ctx->Line.StippleFlag; + raster->line_stipple_pattern = ctx->Line.StipplePattern; /* GL stipple factor is in [1,256], remap to [0, 255] here */ - raster.line_stipple_factor = ctx->Line.StippleFactor - 1; + raster->line_stipple_factor = ctx->Line.StippleFactor - 1; /* _NEW_MULTISAMPLE */ if (ctx->Multisample.Enabled) - raster.multisample = 1; + raster->multisample = 1; /* _NEW_SCISSOR */ if (ctx->Scissor.Enabled) - raster.scissor = 1; + raster->scissor = 1; - cso = st_cached_rasterizer_state(st, &raster); - if (st->state.rasterizer != cso) { - st->state.rasterizer = cso; - st->pipe->bind_rasterizer_state(st->pipe, cso->data); - } + cso_set_rasterizer(st->cso_context, raster); } const struct st_tracked_state st_update_rasterizer = { diff --git a/src/mesa/state_tracker/st_atom_sampler.c b/src/mesa/state_tracker/st_atom_sampler.c index 92263cb688..5787a7492c 100644 --- a/src/mesa/state_tracker/st_atom_sampler.c +++ b/src/mesa/state_tracker/st_atom_sampler.c @@ -33,11 +33,11 @@ #include "st_context.h" -#include "st_cache.h" #include "st_atom.h" #include "st_program.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" +#include "cso_cache/cso_context.h" /** @@ -120,12 +120,13 @@ update_samplers(struct st_context *st) const struct st_fragment_program *fs = st->fp; GLuint su; + st->state.num_samplers = 0; + /* loop over sampler units (aka tex image units) */ for (su = 0; su < st->ctx->Const.MaxTextureImageUnits; su++) { - struct pipe_sampler_state sampler; - const struct cso_sampler *cso; + struct pipe_sampler_state *sampler = st->state.samplers + su; - memset(&sampler, 0, sizeof(sampler)); + memset(sampler, 0, sizeof(*sampler)); if (fs->Base.Base.SamplersUsed & (1 << su)) { GLuint texUnit = fs->Base.Base.SamplerUnits[su]; @@ -134,51 +135,56 @@ update_samplers(struct st_context *st) assert(texobj); - sampler.wrap_s = gl_wrap_to_sp(texobj->WrapS); - sampler.wrap_t = gl_wrap_to_sp(texobj->WrapT); - sampler.wrap_r = gl_wrap_to_sp(texobj->WrapR); + sampler->wrap_s = gl_wrap_to_sp(texobj->WrapS); + sampler->wrap_t = gl_wrap_to_sp(texobj->WrapT); + sampler->wrap_r = gl_wrap_to_sp(texobj->WrapR); - sampler.min_img_filter = gl_filter_to_img_filter(texobj->MinFilter); - sampler.min_mip_filter = gl_filter_to_mip_filter(texobj->MinFilter); - sampler.mag_img_filter = gl_filter_to_img_filter(texobj->MagFilter); + sampler->min_img_filter = gl_filter_to_img_filter(texobj->MinFilter); + sampler->min_mip_filter = gl_filter_to_mip_filter(texobj->MinFilter); + sampler->mag_img_filter = gl_filter_to_img_filter(texobj->MagFilter); if (texobj->Target != GL_TEXTURE_RECTANGLE_ARB) - sampler.normalized_coords = 1; + sampler->normalized_coords = 1; - sampler.lod_bias = st->ctx->Texture.Unit[su].LodBias; + sampler->lod_bias = st->ctx->Texture.Unit[su].LodBias; #if 1 - sampler.min_lod = (texobj->MinLod) < 0.0 ? 0.0 : texobj->MinLod; - sampler.max_lod = texobj->MaxLod; + sampler->min_lod = (texobj->MinLod) < 0.0 ? 0.0 : texobj->MinLod; + sampler->max_lod = texobj->MaxLod; #else /* min/max lod should really be as follows (untested). * Also, calculate_first_last_level() needs to be overhauled * since today's hardware had real support for LOD clamping. */ - sampler.min_lod = MAX2(texobj->BaseLevel, texobj->MinLod); - sampler.max_lod = MIN2(texobj->MaxLevel, texobj->MaxLod); + sampler->min_lod = MAX2(texobj->BaseLevel, texobj->MinLod); + sampler->max_lod = MIN2(texobj->MaxLevel, texobj->MaxLod); #endif - sampler.max_anisotropy = texobj->MaxAnisotropy; + sampler->max_anisotropy = texobj->MaxAnisotropy; + if (sampler->max_anisotropy > 1.0) { + sampler->min_img_filter = PIPE_TEX_FILTER_ANISO; + sampler->mag_img_filter = PIPE_TEX_FILTER_ANISO; + } /* only care about ARB_shadow, not SGI shadow */ if (texobj->CompareMode == GL_COMPARE_R_TO_TEXTURE) { - sampler.compare = 1; - sampler.compare_mode = PIPE_TEX_COMPARE_R_TO_TEXTURE; - sampler.compare_func + sampler->compare = 1; + sampler->compare_mode = PIPE_TEX_COMPARE_R_TO_TEXTURE; + sampler->compare_func = st_compare_func_to_pipe(texobj->CompareFunc); } - /* XXX more sampler state here */ - } + st->state.num_samplers = su + 1; - cso = st_cached_sampler_state(st, &sampler); + /* XXX more sampler state here */ - if (cso != st->state.sampler[su]) { - /* state has changed */ - st->state.sampler[su] = cso; - st->pipe->bind_sampler_state(st->pipe, su, cso->data); + cso_single_sampler(st->cso_context, su, sampler); + } + else { + cso_single_sampler(st->cso_context, su, NULL); } } + + cso_single_sampler_done(st->cso_context); } diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c index 10c131d554..0726688493 100644 --- a/src/mesa/state_tracker/st_atom_shader.c +++ b/src/mesa/state_tracker/st_atom_shader.c @@ -43,10 +43,9 @@ #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" -#include "cso_cache/cso_cache.h" +#include "cso_cache/cso_context.h" #include "st_context.h" -#include "st_cache.h" #include "st_atom.h" #include "st_program.h" #include "st_atom_shader.h" @@ -70,9 +69,6 @@ struct translated_vertex_program /** Maps VERT_RESULT_x to slot */ GLuint output_to_slot[VERT_RESULT_MAX]; - /** The program in TGSI format */ - struct tgsi_token tokens[ST_MAX_SHADER_TOKENS]; - /** Pointer to the translated vertex program */ struct st_vertex_program *vp; @@ -158,7 +154,7 @@ find_translated_vp(struct st_context *st, /* * Translate fragment program if needed. */ - if (!stfp->cso) { + if (!stfp->state.tokens) { GLuint inAttr, numIn = 0; for (inAttr = 0; inAttr < FRAG_ATTRIB_MAX; inAttr++) { @@ -175,11 +171,7 @@ find_translated_vp(struct st_context *st, assert(stfp->Base.Base.NumInstructions > 1); - (void) st_translate_fragment_program(st, stfp, - stfp->input_to_slot, - stfp->tokens, - ST_MAX_SHADER_TOKENS); - assert(stfp->cso); + st_translate_fragment_program(st, stfp, stfp->input_to_slot); } @@ -261,12 +253,8 @@ find_translated_vp(struct st_context *st, assert(stvp->Base.Base.NumInstructions > 1); - st_translate_vertex_program(st, stvp, - xvp->output_to_slot, - xvp->tokens, - ST_MAX_SHADER_TOKENS); + st_translate_vertex_program(st, stvp, xvp->output_to_slot); - assert(stvp->cso); xvp->vp = stvp; /* translated VP is up to date now */ @@ -296,12 +284,10 @@ update_linkage( struct st_context *st ) xvp = find_translated_vp(st, stvp, stfp); st->vp = stvp; - st->state.vs = xvp->vp; - st->pipe->bind_vs_state(st->pipe, st->state.vs->cso->data); - st->fp = stfp; - st->state.fs = stfp->cso; - st->pipe->bind_fs_state(st->pipe, st->state.fs->data); + + st->pipe->bind_vs_state(st->pipe, stvp->driver_shader); + st->pipe->bind_fs_state(st->pipe, stfp->driver_shader); st->vertex_result_to_slot = xvp->output_to_slot; } diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c index 697d2cdfb4..e53a897637 100644 --- a/src/mesa/state_tracker/st_atom_texture.c +++ b/src/mesa/state_tracker/st_atom_texture.c @@ -37,6 +37,7 @@ #include "st_texture.h" #include "st_cb_texture.h" #include "pipe/p_context.h" +#include "pipe/p_inlines.h" /** @@ -51,6 +52,8 @@ update_textures(struct st_context *st) struct gl_fragment_program *fprog = st->ctx->FragmentProgram._Current; GLuint unit; + st->state.num_textures = 0; + for (unit = 0; unit < st->ctx->Const.MaxTextureCoordUnits; unit++) { const GLuint su = fprog->Base.SamplerUnits[unit]; struct gl_texture_object *texObj = st->ctx->Texture.Unit[su]._Current; @@ -62,6 +65,8 @@ update_textures(struct st_context *st) retval = st_finalize_texture(st->ctx, st->pipe, texObj, &flush); /* XXX retval indicates whether there's a texture border */ + + st->state.num_textures = unit + 1; } /* XXX: need to ensure that textures are unbound/removed from @@ -70,18 +75,16 @@ update_textures(struct st_context *st) */ pt = st_get_stobj_texture(stObj); - - if (st->state.sampler_texture[unit] != pt) { - st->state.sampler_texture[unit] = pt; - st->pipe->set_sampler_texture(st->pipe, unit, pt); - } + pipe_texture_reference(&st->state.sampler_texture[unit], pt); if (stObj && stObj->dirtyData) { st->pipe->texture_update(st->pipe, pt); stObj->dirtyData = GL_FALSE; } - } + + st->pipe->set_sampler_textures(st->pipe, st->state.num_textures, + st->state.sampler_texture); } diff --git a/src/mesa/state_tracker/st_cb_accum.c b/src/mesa/state_tracker/st_cb_accum.c index 663c4f205d..f1fddc4e02 100644 --- a/src/mesa/state_tracker/st_cb_accum.c +++ b/src/mesa/state_tracker/st_cb_accum.c @@ -35,7 +35,6 @@ #include "main/macros.h" #include "st_context.h" -#include "st_cache.h" #include "st_cb_accum.h" #include "st_cb_fbo.h" #include "st_draw.h" diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c index e712fd84cd..cc8a136292 100644 --- a/src/mesa/state_tracker/st_cb_clear.c +++ b/src/mesa/state_tracker/st_cb_clear.c @@ -35,7 +35,6 @@ #include "main/macros.h" #include "shader/prog_instruction.h" #include "st_atom.h" -#include "st_cache.h" #include "st_context.h" #include "st_cb_accum.h" #include "st_cb_clear.h" @@ -50,6 +49,8 @@ #include "pipe/p_defines.h" #include "pipe/p_winsys.h" +#include "cso_cache/cso_context.h" + @@ -157,8 +158,7 @@ make_frag_shader(struct st_context *st) p->OutputsWritten = (1 << FRAG_RESULT_COLR); stfp = (struct st_fragment_program *) p; - st_translate_fragment_program(st, stfp, NULL, - stfp->tokens, ST_MAX_SHADER_TOKENS); + st_translate_fragment_program(st, stfp, NULL); return stfp; } @@ -206,9 +206,10 @@ make_vertex_shader(struct st_context *st) (1 << VERT_RESULT_HPOS)); stvp = (struct st_vertex_program *) p; - st_translate_vertex_program(st, stvp, NULL, - stvp->tokens, ST_MAX_SHADER_TOKENS); + st_translate_vertex_program(st, stvp, NULL); +#if 0 assert(stvp->cso); +#endif return stvp; } @@ -284,7 +285,6 @@ clear_with_quad(GLcontext *ctx, /* blend state: RGBA masking */ { struct pipe_blend_state blend; - const struct cso_blend *cso; memset(&blend, 0, sizeof(blend)); if (color) { if (ctx->Color.ColorMask[0]) @@ -298,14 +298,12 @@ clear_with_quad(GLcontext *ctx, if (st->ctx->Color.DitherFlag) blend.dither = 1; } - cso = st_cached_blend_state(st, &blend); - pipe->bind_blend_state(pipe, cso->data); + cso_set_blend(st->cso_context, &blend); } /* depth_stencil state: always pass/set to ref value */ { struct pipe_depth_stencil_alpha_state depth_stencil; - const struct cso_depth_stencil_alpha *cso; memset(&depth_stencil, 0, sizeof(depth_stencil)); if (depth) { depth_stencil.depth.enabled = 1; @@ -323,14 +321,13 @@ clear_with_quad(GLcontext *ctx, depth_stencil.stencil[0].value_mask = 0xff; depth_stencil.stencil[0].write_mask = ctx->Stencil.WriteMask[0] & 0xff; } - cso = st_cached_depth_stencil_alpha_state(st, &depth_stencil); - pipe->bind_depth_stencil_alpha_state(pipe, cso->data); + + cso_set_depth_stencil_alpha(st->cso_context, &depth_stencil); } /* rasterizer state: nothing */ { struct pipe_rasterizer_state raster; - const struct cso_rasterizer *cso; memset(&raster, 0, sizeof(raster)); #if 0 /* don't do per-pixel scissor; we'll just draw a PIPE_PRIM_QUAD @@ -339,8 +336,7 @@ clear_with_quad(GLcontext *ctx, if (ctx->Scissor.Enabled) raster.scissor = 1; #endif - cso = st_cached_rasterizer_state(st, &raster); - pipe->bind_rasterizer_state(pipe, cso->data); + cso_set_rasterizer(st->cso_context, &raster); } /* fragment shader state: color pass-through program */ @@ -349,7 +345,7 @@ clear_with_quad(GLcontext *ctx, if (!stfp) { stfp = make_frag_shader(st); } - pipe->bind_fs_state(pipe, stfp->cso->data); + pipe->bind_fs_state(pipe, stfp->driver_shader); } /* vertex shader state: color/position pass-through */ @@ -358,7 +354,7 @@ clear_with_quad(GLcontext *ctx, if (!stvp) { stvp = make_vertex_shader(st); } - pipe->bind_vs_state(pipe, stvp->cso->data); + pipe->bind_vs_state(pipe, stvp->driver_shader); } /* viewport state: viewport matching window dims */ @@ -380,16 +376,19 @@ clear_with_quad(GLcontext *ctx, /* draw quad matching scissor rect (XXX verify coord round-off) */ draw_quad(ctx, x0, y0, x1, y1, ctx->Depth.Clear, ctx->Color.ClearColor); +#if 0 + /* Can't depend on old state objects still existing -- may have + * been deleted to make room in the hash, etc. (Should get + * fixed...) + */ + st_invalidate_state(ctx, _NEW_COLOR | _NEW_DEPTH | _NEW_STENCIL); +#else /* Restore pipe state */ - pipe->bind_blend_state(pipe, st->state.blend->data); - pipe->bind_depth_stencil_alpha_state(pipe, st->state.depth_stencil->data); - pipe->bind_fs_state(pipe, st->state.fs->data); - pipe->bind_vs_state(pipe, st->state.vs->cso->data); - pipe->bind_rasterizer_state(pipe, st->state.rasterizer->data); + cso_set_rasterizer(st->cso_context, &st->state.rasterizer); + pipe->bind_fs_state(pipe, st->fp->driver_shader); + pipe->bind_vs_state(pipe, st->vp->driver_shader); +#endif pipe->set_viewport_state(pipe, &st->state.viewport); - /* OR: - st_invalidate_state(ctx, _NEW_COLOR | _NEW_DEPTH | _NEW_STENCIL); - */ } @@ -545,6 +544,15 @@ clear_stencil_buffer(GLcontext *ctx, struct gl_renderbuffer *rb) /* simple clear of whole buffer */ GLuint clearValue = ctx->Stencil.Clear; + + switch (strb->surface->format) { + case PIPE_FORMAT_S8Z24_UNORM: + clearValue <<= 24; + break; + default: + ; /* no-op, stencil value is in least significant bits */ + } + ctx->st->pipe->clear(ctx->st->pipe, strb->surface, clearValue); } } diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 65c9fda9cb..18ec9645c4 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -41,7 +41,6 @@ #include "st_context.h" #include "st_atom.h" #include "st_atom_constbuf.h" -#include "st_cache.h" #include "st_draw.h" #include "st_program.h" #include "st_cb_drawpixels.h" @@ -58,6 +57,7 @@ #include "pipe/p_winsys.h" #include "util/p_tile.h" #include "shader/prog_instruction.h" +#include "cso_cache/cso_context.h" /** @@ -159,8 +159,7 @@ make_bitmap_fragment_program(GLcontext *ctx) stfp = (struct st_fragment_program *) p; stfp->Base.UsesKill = GL_TRUE; - st_translate_fragment_program(ctx->st, stfp, NULL, - stfp->tokens, ST_MAX_SHADER_TOKENS); + st_translate_fragment_program(ctx->st, stfp, NULL); return stfp; } @@ -204,8 +203,7 @@ combined_bitmap_fragment_program(GLcontext *ctx) #endif /* translate to TGSI tokens */ - st_translate_fragment_program(st, stfp, NULL, - stfp->tokens, ST_MAX_SHADER_TOKENS); + st_translate_fragment_program(st, stfp, NULL); /* save new program, update serial numbers */ st->bitmap.user_prog_sn = st->fp->serialNo; @@ -266,8 +264,7 @@ combined_drawpix_fragment_program(GLcontext *ctx) #endif /* translate to TGSI tokens */ - st_translate_fragment_program(st, stfp, NULL, - stfp->tokens, ST_MAX_SHADER_TOKENS); + st_translate_fragment_program(st, stfp, NULL); /* save new program, update serial numbers */ st->pixel_xfer.xfer_prog_sn = st->pixel_xfer.program->serialNo; @@ -343,8 +340,7 @@ make_fragment_shader_z(struct st_context *st) p->OutputsWritten = (1 << FRAG_RESULT_COLR) | (1 << FRAG_RESULT_DEPR); stfp = (struct st_fragment_program *) p; - st_translate_fragment_program(st, stfp, NULL, - stfp->tokens, ST_MAX_SHADER_TOKENS); + st_translate_fragment_program(st, stfp, NULL); return stfp; } @@ -421,8 +417,7 @@ st_make_passthrough_vertex_shader(struct st_context *st, GLboolean passColor) } stvp = (struct st_vertex_program *) p; - st_translate_vertex_program(st, stvp, NULL, - stvp->tokens, ST_MAX_SHADER_TOKENS); + st_translate_vertex_program(st, stvp, NULL); progs[passColor] = stvp; @@ -641,8 +636,9 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, const GLfloat *color, GLboolean invertTex) { - const GLuint unit = 0; + struct st_context *st = ctx->st; struct pipe_context *pipe = ctx->st->pipe; + struct cso_context *cso = ctx->st->cso_context; GLfloat x0, y0, x1, y1; GLuint maxSize; @@ -657,24 +653,23 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, /* setup state: just scissor */ { struct pipe_rasterizer_state setup; - const struct cso_rasterizer *cso; memset(&setup, 0, sizeof(setup)); if (ctx->Scissor.Enabled) setup.scissor = 1; - cso = st_cached_rasterizer_state(ctx->st, &setup); - pipe->bind_rasterizer_state(pipe, cso->data); + + cso_set_rasterizer(cso, &setup); } /* fragment shader state: TEX lookup program */ - pipe->bind_fs_state(pipe, stfp->cso->data); + pipe->bind_fs_state(pipe, stfp->driver_shader); /* vertex shader state: position + texcoord pass-through */ - pipe->bind_vs_state(pipe, stvp->cso->data); + pipe->bind_vs_state(pipe, stvp->driver_shader); + /* texture sampling state: */ { struct pipe_sampler_state sampler; - const struct cso_sampler *cso; memset(&sampler, 0, sizeof(sampler)); sampler.wrap_s = PIPE_TEX_WRAP_CLAMP; sampler.wrap_t = PIPE_TEX_WRAP_CLAMP; @@ -683,8 +678,9 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; sampler.normalized_coords = 1; - cso = st_cached_sampler_state(ctx->st, &sampler); - pipe->bind_sampler_state(pipe, unit, cso->data); + + cso_single_sampler(cso, 0, &sampler); + cso_single_sampler_done(cso); } /* viewport state: viewport matching window dims */ @@ -705,7 +701,7 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, /* texture state: */ { - pipe->set_sampler_texture(pipe, unit, pt); + pipe->set_sampler_textures(pipe, 1, &pt); } /* Compute window coords (y=0=bottom) with pixel zoom. @@ -724,12 +720,25 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, draw_quad(ctx, x0, y0, z, x1, y1, invertTex); /* restore GL state */ - pipe->bind_rasterizer_state(pipe, ctx->st->state.rasterizer->data); - pipe->bind_fs_state(pipe, ctx->st->state.fs->data); - pipe->bind_vs_state(pipe, ctx->st->state.vs->cso->data); - pipe->set_sampler_texture(pipe, unit, ctx->st->state.sampler_texture[unit]); - pipe->bind_sampler_state(pipe, unit, ctx->st->state.sampler[unit]->data); + pipe->set_sampler_textures(pipe, ctx->st->state.num_textures, + ctx->st->state.sampler_texture); + pipe->set_viewport_state(pipe, &ctx->st->state.viewport); + +#if 0 + /* Can't depend on old state objects still existing -- may have + * been deleted to make room in the hash, etc. (Should get + * fixed...) + */ + st_invalidate_state(ctx, _NEW_COLOR | _NEW_TEXTURE); +#else + /* restore state */ + pipe->bind_fs_state(pipe, st->fp->driver_shader); + pipe->bind_vs_state(pipe, st->vp->driver_shader); + cso_set_rasterizer(cso, &st->state.rasterizer); + cso_set_samplers(cso, PIPE_MAX_SAMPLERS, + (const struct pipe_sampler_state **) st->state.sampler_list); +#endif } @@ -797,10 +806,10 @@ compatible_formats(GLenum format, GLenum type, enum pipe_format pipeFormat) static GLboolean any_fragment_ops(const struct st_context *st) { - if (st->state.depth_stencil->state.alpha.enabled || - st->state.blend->state.blend_enable || - st->state.blend->state.logicop_enable || - st->state.depth_stencil->state.depth.enabled) + if (st->state.depth_stencil.alpha.enabled || + st->state.depth_stencil.depth.enabled || + st->state.blend.blend_enable || + st->state.blend.logicop_enable) /* XXX more checks */ return GL_TRUE; else diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c index 61d4f4c41c..4dc76f19b1 100644 --- a/src/mesa/state_tracker/st_cb_program.c +++ b/src/mesa/state_tracker/st_cb_program.c @@ -168,11 +168,13 @@ static void st_program_string_notify( GLcontext *ctx, stfp->serialNo++; +#if 0 if (stfp->cso) { /* free the TGSI code */ // cso_delete(stfp->vs); stfp->cso = NULL; } +#endif stfp->param_state = stfp->Base.Base.Parameters->StateFlags; @@ -184,13 +186,14 @@ static void st_program_string_notify( GLcontext *ctx, stvp->serialNo++; +#if 0 if (stvp->cso) { /* free the CSO data */ st->pipe->delete_vs_state(st->pipe, stvp->cso->data); FREE((void *) stvp->cso); stvp->cso = NULL; } - +#endif if (stvp->draw_shader) { draw_delete_vertex_shader(st->draw, stvp->draw_shader); stvp->draw_shader = NULL; diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 1ba3173312..d4731c7737 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -529,7 +529,8 @@ st_TexImage(GLcontext * ctx, texImage->RowStride = postConvWidth; } - assert(texImage->RowStride == postConvWidth); + /* we'll set RowStride elsewhere when the texture is a "mapped" state */ + /*assert(texImage->RowStride == postConvWidth);*/ } /* Release the reference to a potentially orphaned buffer. diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index 09e389f9dc..5458ab420e 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -56,6 +56,7 @@ #include "pipe/p_inlines.h" #include "draw/draw_context.h" #include "cso_cache/cso_cache.h" +#include "cso_cache/cso_context.h" /** @@ -78,6 +79,7 @@ void st_invalidate_state(GLcontext * ctx, GLuint new_state) static struct st_context * st_create_context_priv( GLcontext *ctx, struct pipe_context *pipe ) { + uint i; struct st_context *st = CALLOC_STRUCT( st_context ); ctx->st = st; @@ -93,12 +95,15 @@ st_create_context_priv( GLcontext *ctx, struct pipe_context *pipe ) st->dirty.mesa = ~0; st->dirty.st = ~0; - st->cache = cso_cache_create(); + st->cso_context = cso_create_context(pipe); st_init_atoms( st ); st_init_draw( st ); st_init_generate_mipmap(st); + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + st->state.sampler_list[i] = &st->state.samplers[i]; + /* we want all vertex data to be placed in buffer objects */ vbo_use_buffer_objects(ctx); @@ -149,7 +154,7 @@ static void st_destroy_context_priv( struct st_context *st ) _vbo_DestroyContext(st->ctx); - cso_cache_delete( st->cache ); + cso_destroy_context(st->cso_context); _mesa_delete_program_cache(st->ctx, st->pixel_xfer.cache); diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index 1fbf9721e7..e81aebba3d 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -74,14 +74,11 @@ struct st_context * Other state is just parameter values. */ struct { - const struct cso_alpha_test *alpha_test; - const struct cso_blend *blend; - const struct cso_sampler *sampler[PIPE_MAX_SAMPLERS]; - const struct cso_depth_stencil_alpha *depth_stencil; - const struct cso_rasterizer *rasterizer; - const struct cso_fragment_shader *fs; - struct st_vertex_program *vs; - + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state depth_stencil; + struct pipe_rasterizer_state rasterizer; + struct pipe_sampler_state samplers[PIPE_MAX_SAMPLERS]; + struct pipe_sampler_state *sampler_list[PIPE_MAX_SAMPLERS]; struct pipe_blend_color blend_color; struct pipe_clip_state clip; struct pipe_constant_buffer constants[2]; @@ -90,6 +87,9 @@ struct st_context struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; struct pipe_viewport_state viewport; + + GLuint num_samplers; + GLuint num_textures; } state; struct { @@ -148,6 +148,10 @@ struct st_context /** For gen/render mipmap feature */ struct { + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state depthstencil; + struct pipe_rasterizer_state rasterizer; + void *blend_cso; void *depthstencil_cso; void *rasterizer_cso; @@ -155,7 +159,7 @@ struct st_context struct st_vertex_program *stvp; } gen_mipmap; - struct cso_cache *cache; + struct cso_context *cso_context; }; diff --git a/src/mesa/state_tracker/st_debug.c b/src/mesa/state_tracker/st_debug.c index 5888bcb98a..9c13010da8 100644 --- a/src/mesa/state_tracker/st_debug.c +++ b/src/mesa/state_tracker/st_debug.c @@ -53,15 +53,15 @@ st_print_current(void) int i; printf("Vertex Transform Inputs:\n"); - for (i = 0; i < st->state.vs->cso->state.num_inputs; i++) { + for (i = 0; i < st->vp->state.num_inputs; i++) { printf(" Slot %d: VERT_ATTRIB_%d\n", i, st->vp->index_to_input[i]); } - tgsi_dump( st->state.vs->cso->state.tokens, 0 ); + tgsi_dump( st->vp->state.tokens, 0 ); if (st->vp->Base.Base.Parameters) _mesa_print_parameter_list(st->vp->Base.Base.Parameters); - tgsi_dump( st->state.fs->state.tokens, 0 ); + tgsi_dump( st->fp->state.tokens, 0 ); if (st->fp->Base.Base.Parameters) _mesa_print_parameter_list(st->fp->Base.Base.Parameters); } diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 1c0fa8c6aa..98504e46c1 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -36,7 +36,6 @@ #include "vbo/vbo.h" #include "st_atom.h" -#include "st_cache.h" #include "st_context.h" #include "st_cb_bufferobjects.h" #include "st_draw.h" @@ -219,7 +218,7 @@ st_draw_vbo(GLcontext *ctx, /* must get these after state validation! */ vp = ctx->st->vp; - vs = &ctx->st->state.vs->cso->state; + vs = &ctx->st->vp->state; /* loop over TGSI shader inputs to determine vertex buffer * and attribute info @@ -481,10 +480,10 @@ st_feedback_draw_vbo(GLcontext *ctx, /* must get these after state validation! */ vp = ctx->st->vp; - vs = &ctx->st->state.vs->cso->state; + vs = &st->vp->state; - if (!st->state.vs->draw_shader) { - st->state.vs->draw_shader = draw_create_vertex_shader(draw, vs); + if (!st->vp->draw_shader) { + st->vp->draw_shader = draw_create_vertex_shader(draw, vs); } /* @@ -496,8 +495,8 @@ st_feedback_draw_vbo(GLcontext *ctx, assert(draw); draw_set_viewport_state(draw, &st->state.viewport); draw_set_clip_state(draw, &st->state.clip); - draw_set_rasterizer_state(draw, &st->state.rasterizer->state); - draw_bind_vertex_shader(draw, st->state.vs->draw_shader); + draw_set_rasterizer_state(draw, &st->state.rasterizer); + draw_bind_vertex_shader(draw, st->vp->draw_shader); set_feedback_vertex_format(ctx); /* loop over TGSI shader inputs to determine vertex buffer diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index 841d77abbc..9c4e1032ef 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -38,6 +38,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" #include "cso_cache/cso_cache.h" +#include "cso_cache/cso_context.h" #include "st_context.h" #include "st_draw.h" @@ -89,8 +90,7 @@ make_tex_fragment_program(GLcontext *ctx) stfp = (struct st_fragment_program *) p; - st_translate_fragment_program(ctx->st, stfp, NULL, - stfp->tokens, ST_MAX_SHADER_TOKENS); + st_translate_fragment_program(ctx->st, stfp, NULL); return stfp; } @@ -118,6 +118,7 @@ st_init_generate_mipmap(struct st_context *st) blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; blend.colormask = PIPE_MASK_RGBA; + st->gen_mipmap.blend = blend; st->gen_mipmap.blend_cso = pipe->create_blend_state(pipe, &blend); memset(&depthstencil, 0, sizeof(depthstencil)); @@ -257,14 +258,14 @@ st_render_mipmap(struct st_context *st, sampler.normalized_coords = 1; - /* bind CSOs */ - pipe->bind_blend_state(pipe, st->gen_mipmap.blend_cso); - pipe->bind_depth_stencil_alpha_state(pipe, st->gen_mipmap.depthstencil_cso); - pipe->bind_rasterizer_state(pipe, st->gen_mipmap.rasterizer_cso); + /* bind state */ + cso_set_blend(st->cso_context, &st->gen_mipmap.blend); + cso_set_depth_stencil_alpha(st->cso_context, &st->gen_mipmap.depthstencil); + cso_set_rasterizer(st->cso_context, &st->gen_mipmap.rasterizer); /* bind shaders */ - pipe->bind_fs_state(pipe, st->gen_mipmap.stfp->cso->data); - pipe->bind_vs_state(pipe, st->gen_mipmap.stvp->cso->data); + pipe->bind_fs_state(pipe, st->gen_mipmap.stfp->driver_shader); + pipe->bind_vs_state(pipe, st->gen_mipmap.stvp->driver_shader); /* * XXX for small mipmap levels, it may be faster to use the software @@ -284,7 +285,7 @@ st_render_mipmap(struct st_context *st, */ sampler.min_lod = sampler.max_lod = srcLevel; sampler_cso = pipe->create_sampler_state(pipe, &sampler); - pipe->bind_sampler_state(pipe, 0, sampler_cso); + pipe->bind_sampler_states(pipe, 1, &sampler_cso); simple_viewport(pipe, pt->width[dstLevel], pt->height[dstLevel]); @@ -293,7 +294,7 @@ st_render_mipmap(struct st_context *st, * the right mipmap level. */ /*pt->first_level = srcLevel;*/ - pipe->set_sampler_texture(pipe, 0, pt); + pipe->set_sampler_textures(pipe, 1, &pt); draw_quad(st->ctx); @@ -304,16 +305,18 @@ st_render_mipmap(struct st_context *st, /*pt->first_level = first_level_save;*/ /* restore pipe state */ - if (st->state.rasterizer) - pipe->bind_rasterizer_state(pipe, st->state.rasterizer->data); - if (st->state.fs) - pipe->bind_fs_state(pipe, st->state.fs->data); - if (st->state.vs) - pipe->bind_vs_state(pipe, st->state.vs->cso->data); - if (st->state.sampler[0]) - pipe->bind_sampler_state(pipe, 0, st->state.sampler[0]->data); - pipe->set_sampler_texture(pipe, 0, st->state.sampler_texture[0]); +#if 0 + cso_set_rasterizer(st->cso_context, &st->state.rasterizer); + cso_set_samplers(st->cso_context, st->state.samplers_list); + pipe->bind_fs_state(pipe, st->fp->shader_program); + pipe->bind_vs_state(pipe, st->vp->shader_program); + pipe->set_sampler_textures(pipe, st->state.num_textures, + st->state.sampler_texture); pipe->set_viewport_state(pipe, &st->state.viewport); +#else + /* XXX is this sufficient? */ + st_invalidate_state(st->ctx, _NEW_COLOR | _NEW_TEXTURE); +#endif return TRUE; } diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index 97206752af..f3cfda0bfb 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -513,10 +513,9 @@ compile_instruction( case OPCODE_TXP: /* texture lookup with divide by Q component */ /* convert to TEX w/ special flag for division */ - fullinst->Instruction.Opcode = TGSI_OPCODE_TEX; + fullinst->Instruction.Opcode = TGSI_OPCODE_TXP; fullinst->Instruction.NumSrcRegs = 2; fullinst->InstructionExtTexture.Texture = map_texture_target( inst->TexSrcTarget ); - fullinst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide = TGSI_EXTSWIZZLE_W; fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit; break; @@ -683,8 +682,9 @@ find_temporaries(const struct gl_program *program, * \param tokens array to store translated tokens in * \param maxTokens size of the tokens array * + * \return number of tokens placed in 'tokens' buffer, or zero if error */ -GLboolean +GLuint tgsi_translate_mesa_program( uint procType, const struct gl_program *program, @@ -918,6 +918,6 @@ tgsi_translate_mesa_program( maxTokens - ti ); } - return GL_TRUE; + return ti; } diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.h b/src/mesa/state_tracker/st_mesa_to_tgsi.h index 3ababf1339..63fc855b53 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.h +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.h @@ -39,7 +39,7 @@ extern "C" { struct tgsi_token; struct gl_program; -GLboolean +GLuint tgsi_translate_mesa_program( uint procType, const struct gl_program *program, diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index aa252c845a..0f8784e132 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -42,15 +42,29 @@ #include "tgsi/util/tgsi_dump.h" #include "st_context.h" -#include "st_cache.h" #include "st_atom.h" #include "st_program.h" #include "st_mesa_to_tgsi.h" +#include "cso_cache/cso_context.h" #define TGSI_DEBUG 0 +/** XXX we should use the version of this from p_util.h but including + * that header causes symbol collisions. + */ +static INLINE void * +mem_dup(const void *src, uint size) +{ + void *dup = MALLOC(size); + if (dup) + memcpy(dup, src, size); + return dup; +} + + + /** * Translate a Mesa vertex shader into a TGSI shader. * \param outputMapping to map vertex program output registers to TGSI @@ -61,15 +75,15 @@ void st_translate_vertex_program(struct st_context *st, struct st_vertex_program *stvp, - const GLuint outputMapping[], - struct tgsi_token *tokensOut, - GLuint maxTokens) + const GLuint outputMapping[]) { + struct pipe_context *pipe = st->pipe; + struct tgsi_token tokens[ST_MAX_SHADER_TOKENS]; GLuint defaultOutputMapping[VERT_RESULT_MAX]; struct pipe_shader_state vs; - const struct cso_vertex_shader *cso; GLuint attr, i; GLuint num_generic = 0; + GLuint num_tokens; memset(&vs, 0, sizeof(vs)); @@ -240,7 +254,7 @@ st_translate_vertex_program(struct st_context *st, /* XXX: fix static allocation of tokens: */ - tgsi_translate_mesa_program( TGSI_PROCESSOR_VERTEX, + num_tokens = tgsi_translate_mesa_program( TGSI_PROCESSOR_VERTEX, &stvp->Base.Base, /* inputs */ vs.num_inputs, @@ -252,20 +266,21 @@ st_translate_vertex_program(struct st_context *st, vs.num_outputs, outputMapping, vs.output_semantic_name, - vs.output_semantic_index, + vs.output_semantic_index, /* tokenized result */ - tokensOut, maxTokens); + tokens, ST_MAX_SHADER_TOKENS); - vs.tokens = tokensOut; + vs.tokens = (struct tgsi_token *) + mem_dup(tokens, num_tokens * sizeof(tokens[0])); - cso = st_cached_vs_state(st, &vs); - stvp->cso = cso; + stvp->state = vs; /* struct copy */ + stvp->driver_shader = pipe->create_vs_state(pipe, &vs); if (0) _mesa_print_program(&stvp->Base.Base); if (TGSI_DEBUG) - tgsi_dump( tokensOut, 0 ); + tgsi_dump( vs.tokens, 0 ); } @@ -280,10 +295,10 @@ st_translate_vertex_program(struct st_context *st, const struct cso_fragment_shader * st_translate_fragment_program(struct st_context *st, struct st_fragment_program *stfp, - const GLuint inputMapping[], - struct tgsi_token *tokensOut, - GLuint maxTokens) + const GLuint inputMapping[]) { + struct pipe_context *pipe = st->pipe; + struct tgsi_token tokens[ST_MAX_SHADER_TOKENS]; GLuint outputMapping[FRAG_RESULT_MAX]; GLuint defaultInputMapping[FRAG_ATTRIB_MAX]; struct pipe_shader_state fs; @@ -293,6 +308,7 @@ st_translate_fragment_program(struct st_context *st, const GLbitfield inputsRead = stfp->Base.Base.InputsRead; GLuint vslot = 0; GLuint num_generic = 0; + GLuint num_tokens; memset(&fs, 0, sizeof(fs)); @@ -401,7 +417,7 @@ st_translate_fragment_program(struct st_context *st, /* XXX: fix static allocation of tokens: */ - tgsi_translate_mesa_program( TGSI_PROCESSOR_FRAGMENT, + num_tokens = tgsi_translate_mesa_program( TGSI_PROCESSOR_FRAGMENT, &stfp->Base.Base, /* inputs */ fs.num_inputs, @@ -415,18 +431,19 @@ st_translate_fragment_program(struct st_context *st, fs.output_semantic_name, fs.output_semantic_index, /* tokenized result */ - tokensOut, maxTokens); + tokens, ST_MAX_SHADER_TOKENS); - fs.tokens = tokensOut; + fs.tokens = (struct tgsi_token *) + mem_dup(tokens, num_tokens * sizeof(tokens[0])); - cso = st_cached_fs_state(st, &fs); - stfp->cso = cso; + stfp->state = fs; /* struct copy */ + stfp->driver_shader = pipe->create_fs_state(pipe, &fs); if (0) _mesa_print_program(&stfp->Base.Base); if (TGSI_DEBUG) - tgsi_dump( tokensOut, 0/*TGSI_DUMP_VERBOSE*/ ); + tgsi_dump( fs.tokens, 0/*TGSI_DUMP_VERBOSE*/ ); return cso; } diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h index 31558af6ce..78786dcbb6 100644 --- a/src/mesa/state_tracker/st_program.h +++ b/src/mesa/state_tracker/st_program.h @@ -60,11 +60,8 @@ struct st_fragment_program /** map FP input back to VP output */ GLuint input_map[PIPE_MAX_SHADER_INPUTS]; - /** The program in TGSI format */ - struct tgsi_token tokens[ST_MAX_SHADER_TOKENS]; - - /** Pointer to the corresponding cached shader */ - const struct cso_fragment_shader *cso; + struct pipe_shader_state state; + struct pipe_shader_state *driver_shader; GLuint param_state; @@ -88,11 +85,8 @@ struct st_vertex_program /** maps a TGSI input index back to a Mesa VERT_ATTRIB_x */ GLuint index_to_input[PIPE_MAX_SHADER_INPUTS]; - /** The program in TGSI format */ - struct tgsi_token tokens[ST_MAX_SHADER_TOKENS]; - - /** Pointer to the corresponding cached shader */ - const struct cso_vertex_shader *cso; + struct pipe_shader_state state; + struct pipe_shader_state *driver_shader; /** For using our private draw module (glRasterPos) */ struct draw_vertex_shader *draw_shader; @@ -122,16 +116,12 @@ st_vertex_program( struct gl_vertex_program *vp ) extern const struct cso_fragment_shader * st_translate_fragment_program(struct st_context *st, struct st_fragment_program *fp, - const GLuint inputMapping[], - struct tgsi_token *tokens, - GLuint maxTokens); + const GLuint inputMapping[]); extern void st_translate_vertex_program(struct st_context *st, struct st_vertex_program *vp, - const GLuint vert_output_to_slot[], - struct tgsi_token *tokens, - GLuint maxTokens); + const GLuint vert_output_to_slot[]); #endif diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c index b7f4d8a307..35dc0e768f 100644 --- a/src/mesa/vbo/vbo_exec_api.c +++ b/src/mesa/vbo/vbo_exec_api.c @@ -309,16 +309,23 @@ static void vbo_exec_fixup_vertex( GLcontext *ctx, GLuint attr, GLuint sz ) { struct vbo_exec_context *exec = &vbo_context(ctx)->exec; + static const GLfloat id[4] = { 0, 0, 0, 1 }; int i; - if (sz > exec->vtx.attrsz[attr]) { + if (exec->vtx.prim_count == 0) { + GLfloat *current = (GLfloat *)vbo_context(ctx)->currval[attr].Ptr; + exec->vtx.attrptr[attr] = current; + memcpy(current, id, sizeof(id)); + ctx->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT; + return; + } + else if (sz > exec->vtx.attrsz[attr]) { /* New size is larger. Need to flush existing vertices and get * an enlarged vertex format. */ vbo_exec_wrap_upgrade_vertex( exec, attr, sz ); } else if (sz < exec->vtx.active_sz[attr]) { - static const GLfloat id[4] = { 0, 0, 0, 1 }; /* New size is smaller - just need to fill in some * zeros. Don't need to flush or wrap. |