diff options
author | Ben Skeggs <skeggsb@gmail.com> | 2008-03-13 13:39:05 +1100 |
---|---|---|
committer | Ben Skeggs <skeggsb@gmail.com> | 2008-03-13 13:39:05 +1100 |
commit | 03ec66375889f049b09f39ba98515aa35ac48164 (patch) | |
tree | e149d8defb52b1252af982b459902ee0800b17cd /src/gallium/auxiliary | |
parent | bd4fe0e87c1b979973d9a76aa48de5fbbb8d52b7 (diff) | |
parent | 2366bb1baf2e9ae5b6ecf19f66ae9e0a4b0d2f36 (diff) |
Merge remote branch 'upstream/gallium-0.1' into nouveau-gallium-0.1
Diffstat (limited to 'src/gallium/auxiliary')
54 files changed, 4511 insertions, 261 deletions
diff --git a/src/gallium/auxiliary/cso_cache/Makefile b/src/gallium/auxiliary/cso_cache/Makefile index 3e49266163..6bd6602088 100644 --- a/src/gallium/auxiliary/cso_cache/Makefile +++ b/src/gallium/auxiliary/cso_cache/Makefile @@ -4,6 +4,7 @@ include $(TOP)/configs/current LIBNAME = cso_cache C_SOURCES = \ + cso_context.c \ cso_cache.c \ cso_hash.c diff --git a/src/gallium/auxiliary/cso_cache/SConscript b/src/gallium/auxiliary/cso_cache/SConscript index 9751881613..651e68a191 100644 --- a/src/gallium/auxiliary/cso_cache/SConscript +++ b/src/gallium/auxiliary/cso_cache/SConscript @@ -3,6 +3,7 @@ Import('*') cso_cache = env.ConvenienceLibrary( target = 'cso_cache', source = [ + 'cso_context.c', 'cso_cache.c', 'cso_hash.c', ]) diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c index b427b509f8..a2764b4265 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.c +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -207,8 +207,11 @@ static INLINE void sanitize_hash(struct cso_hash *hash, enum cso_cache_type type { /* if we're approach the maximum size, remove fourth of the entries * otherwise every subsequent call will go through the same */ - int max_entries = (max_size > cso_hash_size(hash)) ? max_size : cso_hash_size(hash); + int hash_size = cso_hash_size(hash); + int max_entries = (max_size > hash_size) ? max_size : hash_size; int to_remove = (max_size < max_entries) * max_entries/4; + if (hash_size > max_size) + to_remove += hash_size - max_size; while (to_remove) { /*remove elements until we're good */ /*fixme: currently we pick the nodes to remove at random*/ diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.h b/src/gallium/auxiliary/cso_cache/cso_cache.h index 44ee128a4a..e5edbbb556 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.h +++ b/src/gallium/auxiliary/cso_cache/cso_cache.h @@ -84,47 +84,49 @@ extern "C" { #endif +typedef void (*cso_state_callback)(void *ctx, void *obj); + struct cso_cache; struct cso_blend { struct pipe_blend_state state; void *data; - void (*delete_state)(struct pipe_context *, void *); + cso_state_callback delete_state; struct pipe_context *context; }; struct cso_depth_stencil_alpha { struct pipe_depth_stencil_alpha_state state; void *data; - void (*delete_state)(struct pipe_context *, void *); + cso_state_callback delete_state; struct pipe_context *context; }; struct cso_rasterizer { struct pipe_rasterizer_state state; void *data; - void (*delete_state)(struct pipe_context *, void *); + cso_state_callback delete_state; struct pipe_context *context; }; struct cso_fragment_shader { struct pipe_shader_state state; void *data; - void (*delete_state)(struct pipe_context *, void *); + cso_state_callback delete_state; struct pipe_context *context; }; struct cso_vertex_shader { struct pipe_shader_state state; void *data; - void (*delete_state)(struct pipe_context *, void *); + cso_state_callback delete_state; struct pipe_context *context; }; struct cso_sampler { struct pipe_sampler_state state; void *data; - void (*delete_state)(struct pipe_context *, void *); + cso_state_callback delete_state; struct pipe_context *context; }; @@ -138,8 +140,6 @@ enum cso_cache_type { CSO_VERTEX_SHADER }; -typedef void (*cso_state_callback)(void *, void *); - unsigned cso_construct_key(void *item, int item_size); struct cso_cache *cso_cache_create(void); diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c new file mode 100644 index 0000000000..f7f4aebb16 --- /dev/null +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -0,0 +1,354 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* Wrap the cso cache & hash mechanisms in a simplified + * pipe-driver-specific interface. + * + * Authors: + * Zack Rusin <zack@tungstengraphics.com> + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "pipe/p_state.h" +#include "pipe/p_util.h" + +#include "cso_cache/cso_context.h" +#include "cso_cache/cso_cache.h" +#include "cso_cache/cso_hash.h" + +struct cso_context { + struct pipe_context *pipe; + struct cso_cache *cache; + + struct { + void *samplers[PIPE_MAX_SAMPLERS]; + unsigned nr_samplers; + } hw; + + void *samplers[PIPE_MAX_SAMPLERS]; + unsigned nr_samplers; + + void *blend; + void *depth_stencil; + void *rasterizer; + void *fragment_shader; + void *vertex_shader; +}; + + +struct cso_context *cso_create_context( struct pipe_context *pipe ) +{ + struct cso_context *ctx = CALLOC_STRUCT(cso_context); + if (ctx == NULL) + goto out; + + ctx->cache = cso_cache_create(); + if (ctx->cache == NULL) + goto out; + + ctx->pipe = pipe; + + /* Enable for testing: */ + if (0) cso_set_maximum_cache_size( ctx->cache, 4 ); + + return ctx; + +out: + cso_destroy_context( ctx ); + return NULL; +} + +static void cso_release_all( struct cso_context *ctx ) +{ + if (ctx->pipe) { + ctx->pipe->bind_blend_state( ctx->pipe, NULL ); + ctx->pipe->bind_rasterizer_state( ctx->pipe, NULL ); + ctx->pipe->bind_sampler_states( ctx->pipe, 0, NULL ); + ctx->pipe->bind_depth_stencil_alpha_state( ctx->pipe, NULL ); + ctx->pipe->bind_fs_state( ctx->pipe, NULL ); + ctx->pipe->bind_vs_state( ctx->pipe, NULL ); + } + + if (ctx->cache) { + cso_cache_delete( ctx->cache ); + ctx->cache = NULL; + } +} + + +void cso_destroy_context( struct cso_context *ctx ) +{ + debug_printf("%s\n", __FUNCTION__); + + if (ctx) + cso_release_all( ctx ); + + FREE( ctx ); +} + + +/* Those function will either find the state of the given template + * in the cache or they will create a new state from the given + * template, insert it in the cache and return it. + */ + +/* + * If the driver returns 0 from the create method then they will assign + * the data member of the cso to be the template itself. + */ + +void cso_set_blend(struct cso_context *ctx, + const struct pipe_blend_state *templ) +{ + unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_blend_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, CSO_BLEND, + (void*)templ); + void *handle; + + if (cso_hash_iter_is_null(iter)) { + struct cso_blend *cso = MALLOC(sizeof(struct cso_blend)); + + cso->state = *templ; + cso->data = ctx->pipe->create_blend_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_blend_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_BLEND, cso); + handle = cso->data; + } + else { + handle = ((struct cso_blend *)cso_hash_iter_data(iter))->data; + } + + if (ctx->blend != handle) { + ctx->blend = handle; + ctx->pipe->bind_blend_state(ctx->pipe, handle); + } +} + +void cso_single_sampler(struct cso_context *ctx, + unsigned idx, + const struct pipe_sampler_state *templ) +{ + void *handle = NULL; + + if (templ != NULL) { + unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_sampler_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, CSO_SAMPLER, + (void*)templ); + + if (cso_hash_iter_is_null(iter)) { + struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler)); + + cso->state = *templ; + cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_sampler_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_SAMPLER, cso); + handle = cso->data; + } + else { + handle = ((struct cso_sampler *)cso_hash_iter_data(iter))->data; + } + } + + ctx->samplers[idx] = handle; +} + +void cso_single_sampler_done( struct cso_context *ctx ) +{ + unsigned i; + + for (i = 0; i < 8; i++) + if (ctx->samplers[i] == NULL) + break; + + ctx->nr_samplers = i; + + if (ctx->hw.nr_samplers != ctx->nr_samplers || + memcmp(ctx->hw.samplers, + ctx->samplers, + ctx->nr_samplers * sizeof(void *)) != 0) + { + memcpy(ctx->hw.samplers, ctx->samplers, ctx->nr_samplers * sizeof(void *)); + ctx->hw.nr_samplers = ctx->nr_samplers; + + ctx->pipe->bind_sampler_states(ctx->pipe, ctx->nr_samplers, ctx->samplers); + } +} + +void cso_set_samplers( struct cso_context *ctx, + unsigned nr, + const struct pipe_sampler_state **templates ) +{ + unsigned i; + + /* TODO: fastpath + */ + + for (i = 0; i < nr; i++) + cso_single_sampler( ctx, i, templates[i] ); + + for ( ; i < ctx->nr_samplers; i++) + cso_single_sampler( ctx, i, NULL ); + + cso_single_sampler_done( ctx ); +} + +void cso_set_depth_stencil_alpha(struct cso_context *ctx, + const struct pipe_depth_stencil_alpha_state *templ) +{ + unsigned hash_key = cso_construct_key((void*)templ, + sizeof(struct pipe_depth_stencil_alpha_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, + CSO_DEPTH_STENCIL_ALPHA, + (void*)templ); + void *handle; + + if (cso_hash_iter_is_null(iter)) { + struct cso_depth_stencil_alpha *cso = MALLOC(sizeof(struct cso_depth_stencil_alpha)); + + cso->state = *templ; + cso->data = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_depth_stencil_alpha_state; + cso->context = ctx->pipe; + + cso_insert_state(ctx->cache, hash_key, CSO_DEPTH_STENCIL_ALPHA, cso); + handle = cso->data; + } + else { + handle = ((struct cso_depth_stencil_alpha *)cso_hash_iter_data(iter))->data; + } + + if (ctx->depth_stencil != handle) { + ctx->depth_stencil = handle; + ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, handle); + } +} + + + +void cso_set_rasterizer(struct cso_context *ctx, + const struct pipe_rasterizer_state *templ) +{ + unsigned hash_key = cso_construct_key((void*)templ, + sizeof(struct pipe_rasterizer_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, CSO_RASTERIZER, + (void*)templ); + void *handle = NULL; + + if (cso_hash_iter_is_null(iter)) { + struct cso_rasterizer *cso = MALLOC(sizeof(struct cso_rasterizer)); + + cso->state = *templ; + cso->data = ctx->pipe->create_rasterizer_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_rasterizer_state; + cso->context = ctx->pipe; + + cso_insert_state(ctx->cache, hash_key, CSO_RASTERIZER, cso); + handle = cso->data; + } + else { + handle = ((struct cso_rasterizer *)cso_hash_iter_data(iter))->data; + } + + if (ctx->rasterizer != handle) { + ctx->rasterizer = handle; + ctx->pipe->bind_rasterizer_state(ctx->pipe, handle); + } +} + + + + + +void cso_set_fragment_shader(struct cso_context *ctx, + const struct pipe_shader_state *templ) +{ + unsigned hash_key = cso_construct_key((void*)templ, + sizeof(struct pipe_shader_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, CSO_FRAGMENT_SHADER, + (void*)templ); + void *handle = NULL; + + if (cso_hash_iter_is_null(iter)) { + struct cso_fragment_shader *cso = MALLOC(sizeof(struct cso_fragment_shader)); + + cso->state = *templ; + cso->data = ctx->pipe->create_fs_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_fs_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_FRAGMENT_SHADER, cso); + handle = cso->data; + } + else { + handle = ((struct cso_fragment_shader *)cso_hash_iter_data(iter))->data; + } + + if (ctx->fragment_shader != handle) { + ctx->fragment_shader = handle; + ctx->pipe->bind_fs_state(ctx->pipe, handle); + } +} + +void cso_set_vertex_shader(struct cso_context *ctx, + const struct pipe_shader_state *templ) +{ + unsigned hash_key = cso_construct_key((void*)templ, + sizeof(struct pipe_shader_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, CSO_VERTEX_SHADER, + (void*)templ); + void *handle = NULL; + + if (cso_hash_iter_is_null(iter)) { + struct cso_vertex_shader *cso = MALLOC(sizeof(struct cso_vertex_shader)); + + cso->state = *templ; + cso->data = ctx->pipe->create_vs_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_vs_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_VERTEX_SHADER, cso); + handle = cso->data; + } + else { + handle = ((struct cso_vertex_shader *)cso_hash_iter_data(iter))->data; + } + + if (ctx->vertex_shader != handle) { + ctx->vertex_shader = handle; + ctx->pipe->bind_vs_state(ctx->pipe, handle); + } +} diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h new file mode 100644 index 0000000000..1f2a630804 --- /dev/null +++ b/src/gallium/auxiliary/cso_cache/cso_context.h @@ -0,0 +1,85 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CSO_CONTEXT_H +#define CSO_CONTEXT_H + +#include "pipe/p_context.h" +#include "pipe/p_state.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +struct cso_context; + +struct cso_context *cso_create_context( struct pipe_context *pipe ); + +void cso_set_blend( struct cso_context *cso, + const struct pipe_blend_state *blend ); + +void cso_set_depth_stencil_alpha( struct cso_context *cso, + const struct pipe_depth_stencil_alpha_state *dsa ); + +void cso_set_rasterizer( struct cso_context *cso, + const struct pipe_rasterizer_state *rasterizer ); + +void cso_set_samplers( struct cso_context *cso, + unsigned count, + const struct pipe_sampler_state **states ); + +/* Alternate interface to support state trackers that like to modify + * samplers one at a time: + */ +void cso_single_sampler( struct cso_context *cso, + unsigned nr, + const struct pipe_sampler_state *states ); + +void cso_single_sampler_done( struct cso_context *cso ); + + +/* These aren't really sensible -- most of the time the api provides + * object semantics for shaders anyway, and the cases where it doesn't + * (eg mesa's internall-generated texenv programs), it will be up to + * the state tracker to implement their own specialized caching. + */ +void cso_set_fragment_shader( struct cso_context *cso, + const struct pipe_shader_state *shader ); + +void cso_set_vertex_shader( struct cso_context *cso, + const struct pipe_shader_state *shader ); + +void cso_destroy_context( struct cso_context *cso ); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.c b/src/gallium/auxiliary/cso_cache/cso_hash.c index b3b4d667d2..5cad5d3be7 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.c +++ b/src/gallium/auxiliary/cso_cache/cso_hash.c @@ -101,13 +101,6 @@ static void *cso_data_allocate_node(struct cso_hash_data *hash) static void cso_data_free_node(struct cso_node *node) { - /* XXX still a leak here. - * Need to cast value ptr to original cso type, then free the - * driver-specific data hanging off of it. For example: - struct cso_sampler *csamp = (struct cso_sampler *) node->value; - FREE(csamp->data); - */ - FREE(node->value); FREE(node); } diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.h b/src/gallium/auxiliary/cso_cache/cso_hash.h index d5bca9d591..84b45a5963 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.h +++ b/src/gallium/auxiliary/cso_cache/cso_hash.h @@ -25,6 +25,16 @@ * **************************************************************************/ +/** + This file provides a hash implementation that is capable of dealing + with collisions. It stores colliding entries in linked list. All + functions operating on the hash return an iterator. The iterator + itself points to the collision list. If there wasn't any collision + the list will have just one entry, otherwise client code should + iterate over the entries to find the exact entry among ones that + had the same key (e.g. memcmp could be used on the data to check + that) +*/ /* * Authors: * Zack Rusin <zack@tungstengraphics.com> diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index c18dcb2927..5cb7664c85 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -16,6 +16,7 @@ draw = env.ConvenienceLibrary( 'draw_offset.c', 'draw_prim.c', 'draw_pstipple.c', + 'draw_passthrough.c', 'draw_stipple.c', 'draw_twoside.c', 'draw_unfilled.c', diff --git a/src/gallium/auxiliary/draw/draw_aaline.c b/src/gallium/auxiliary/draw/draw_aaline.c index 7660e56fe6..6b1e640ae9 100644 --- a/src/gallium/auxiliary/draw/draw_aaline.c +++ b/src/gallium/auxiliary/draw/draw_aaline.c @@ -78,7 +78,8 @@ struct aaline_stage void *sampler_cso; struct pipe_texture *texture; - uint sampler_unit; + uint num_samplers; + uint num_textures; /* @@ -98,11 +99,10 @@ struct aaline_stage void (*driver_bind_fs_state)(struct pipe_context *, void *); void (*driver_delete_fs_state)(struct pipe_context *, void *); - void (*driver_bind_sampler_state)(struct pipe_context *, unsigned, void *); - - void (*driver_set_sampler_texture)(struct pipe_context *, - unsigned sampler, - struct pipe_texture *); + void (*driver_bind_sampler_states)(struct pipe_context *, unsigned, + void **); + void (*driver_set_sampler_textures)(struct pipe_context *, unsigned, + struct pipe_texture **); struct pipe_context *pipe; }; @@ -607,6 +607,7 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) auto struct aaline_stage *aaline = aaline_stage(stage); struct draw_context *draw = stage->draw; struct pipe_context *pipe = aaline->pipe; + uint num = MAX2(aaline->num_textures, aaline->num_samplers); assert(draw->rasterizer->line_smooth); @@ -624,8 +625,11 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) */ bind_aaline_fragment_shader(aaline); - aaline->driver_bind_sampler_state(pipe, aaline->sampler_unit, aaline->sampler_cso); - aaline->driver_set_sampler_texture(pipe, aaline->sampler_unit, aaline->texture); + aaline->state.sampler[num] = aaline->sampler_cso; + aaline->state.texture[num] = aaline->texture; + + aaline->driver_bind_sampler_states(pipe, num + 1, aaline->state.sampler); + aaline->driver_set_sampler_textures(pipe, num + 1, aaline->state.texture); /* now really draw first line */ stage->line = aaline_line; @@ -647,10 +651,10 @@ aaline_flush(struct draw_stage *stage, unsigned flags) aaline->driver_bind_fs_state(pipe, aaline->fs->driver_fs); /* XXX restore original texture, sampler state */ - aaline->driver_bind_sampler_state(pipe, aaline->sampler_unit, - aaline->state.sampler[aaline->sampler_unit]); - aaline->driver_set_sampler_texture(pipe, aaline->sampler_unit, - aaline->state.texture[aaline->sampler_unit]); + aaline->driver_bind_sampler_states(pipe, aaline->num_samplers, + aaline->state.sampler); + aaline->driver_set_sampler_textures(pipe, aaline->num_textures, + aaline->state.texture); draw->extra_vp_outputs.slot = 0; } @@ -729,7 +733,8 @@ aaline_bind_fs_state(struct pipe_context *pipe, void *fs) /* save current */ aaline->fs = aafs; /* pass-through */ - aaline->driver_bind_fs_state(aaline->pipe, aafs->driver_fs); + aaline->driver_bind_fs_state(aaline->pipe, + (aafs ? aafs->driver_fs : NULL)); } @@ -745,26 +750,28 @@ aaline_delete_fs_state(struct pipe_context *pipe, void *fs) static void -aaline_bind_sampler_state(struct pipe_context *pipe, - unsigned unit, void *sampler) +aaline_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) { struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); /* save current */ - aaline->state.sampler[unit] = sampler; + memcpy(aaline->state.sampler, sampler, num * sizeof(void *)); + aaline->num_samplers = num; /* pass-through */ - aaline->driver_bind_sampler_state(aaline->pipe, unit, sampler); + aaline->driver_bind_sampler_states(aaline->pipe, num, sampler); } static void -aaline_set_sampler_texture(struct pipe_context *pipe, - unsigned sampler, struct pipe_texture *texture) +aaline_set_sampler_textures(struct pipe_context *pipe, + unsigned num, struct pipe_texture **texture) { struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); /* save current */ - aaline->state.texture[sampler] = texture; + memcpy(aaline->state.texture, texture, num * sizeof(struct pipe_texture *)); + aaline->num_textures = num; /* pass-through */ - aaline->driver_set_sampler_texture(aaline->pipe, sampler, texture); + aaline->driver_set_sampler_textures(aaline->pipe, num, texture); } @@ -798,14 +805,14 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe) aaline->driver_bind_fs_state = pipe->bind_fs_state; aaline->driver_delete_fs_state = pipe->delete_fs_state; - aaline->driver_bind_sampler_state = pipe->bind_sampler_state; - aaline->driver_set_sampler_texture = pipe->set_sampler_texture; + aaline->driver_bind_sampler_states = pipe->bind_sampler_states; + aaline->driver_set_sampler_textures = pipe->set_sampler_textures; /* override the driver's functions */ pipe->create_fs_state = aaline_create_fs_state; pipe->bind_fs_state = aaline_bind_fs_state; pipe->delete_fs_state = aaline_delete_fs_state; - pipe->bind_sampler_state = aaline_bind_sampler_state; - pipe->set_sampler_texture = aaline_set_sampler_texture; + pipe->bind_sampler_states = aaline_bind_sampler_states; + pipe->set_sampler_textures = aaline_set_sampler_textures; } diff --git a/src/gallium/auxiliary/draw/draw_aapoint.c b/src/gallium/auxiliary/draw/draw_aapoint.c index 70f696475f..99e9e9fe34 100644 --- a/src/gallium/auxiliary/draw/draw_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_aapoint.c @@ -800,7 +800,8 @@ aapoint_bind_fs_state(struct pipe_context *pipe, void *fs) /* save current */ aapoint->fs = aafs; /* pass-through */ - aapoint->driver_bind_fs_state(aapoint->pipe, aafs->driver_fs); + aapoint->driver_bind_fs_state(aapoint->pipe, + (aafs ? aafs->driver_fs : NULL)); } diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 428b6209e0..fed2b6e759 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -34,6 +34,7 @@ #include "pipe/p_util.h" #include "draw_context.h" #include "draw_private.h" +#include "draw_vbuf.h" @@ -114,6 +115,13 @@ void draw_destroy( struct draw_context *draw ) draw->pipeline.rasterize->destroy( draw->pipeline.rasterize ); tgsi_exec_machine_free_data(&draw->machine); align_free( draw->vs.queue[0].vertex ); /* Frees all the vertices. */ + + /* Not so fast -- we're just borrowing this at the moment. + * + if (draw->render) + draw->render->destroy( draw->render ); + */ + FREE( draw ); } @@ -349,3 +357,10 @@ void draw_reset_vertex_ids(struct draw_context *draw) draw_vertex_cache_reset_vertex_ids(draw); } + + +void draw_set_render( struct draw_context *draw, + struct vbuf_render *render ) +{ + draw->render = render; +} diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index ab87b4127c..df63e91a22 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -168,4 +168,9 @@ unsigned draw_trim_prim( unsigned mode, unsigned count ); + +struct vbuf_render; +void draw_set_render( struct draw_context *draw, + struct vbuf_render *render ); + #endif /* DRAW_CONTEXT_H */ diff --git a/src/gallium/auxiliary/draw/draw_passthrough.c b/src/gallium/auxiliary/draw/draw_passthrough.c new file mode 100644 index 0000000000..a51fa0ab23 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_passthrough.c @@ -0,0 +1,222 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +/* This code is a prototype of what a passhthrough vertex shader might + * look like. + * + * Probably the best approach for us is to do: + * - vertex fetch + * - vertex shader + * - cliptest / viewport transform + * + * in one step, then examine the clipOrMask & choose between two paths: + * + * Either: + * - build primitive headers + * - clip and the primitive path + * - build clipped vertex buffers, + * - vertex-emit to vbuf buffers + * + * Or, if no clipping: + * - vertex-emit directly to vbuf buffers + * + * But when bypass clipping is enabled, we just take the latter + * choice. If (some new) passthrough-vertex-shader flag is also set, + * the pipeline degenerates to: + * + * - vertex fetch + * - vertex emit to vbuf buffers + * + * Which is what is prototyped here. + */ +#include "pipe/p_util.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" + + + +/* Example of a fetch/emit passthrough shader which could be + * generated when bypass_clipping is enabled on a passthrough vertex + * shader. + */ +static void fetch_xyz_rgb_st( struct draw_context *draw, + float *out, + unsigned start, + unsigned count ) +{ + const unsigned *pitch = draw->vertex_fetch.pitch; + const ubyte **src = draw->vertex_fetch.src_ptr; + unsigned i; + + const ubyte *xyzw = src[0] + start * pitch[0]; + const ubyte *rgba = src[1] + start * pitch[1]; + const ubyte *st = src[2] + start * pitch[2]; + + /* loop over vertex attributes (vertex shader inputs) + */ + for (i = 0; i < count; i++) { + { + const float *in = (const float *)xyzw; xyzw += pitch[0]; + /* decode input, encode output. Assume both are float[4] */ + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + } + + { + const float *in = (const float *)rgba; rgba += pitch[1]; + /* decode input, encode output. Assume both are float[4] */ + out[4] = in[0]; + out[5] = in[1]; + out[6] = in[2]; + out[7] = in[3]; + } + + { + const float *in = (const float *)st; st += pitch[2]; + /* decode input, encode output. Assume both are float[2] */ + out[8] = in[0]; + out[9] = in[1]; + } + + out += 10; + } +} + + +static boolean update_shader( struct draw_context *draw ) +{ + const struct vertex_info *vinfo = draw->render->get_vertex_info(draw->render); + + unsigned nr_attrs = vinfo->num_attribs; + unsigned i; + + for (i = 0; i < nr_attrs; i++) { + unsigned buf = draw->vertex_element[i].vertex_buffer_index; + + draw->vertex_fetch.src_ptr[i] = (const ubyte *) draw->user.vbuffer[buf] + + draw->vertex_buffer[buf].buffer_offset + + draw->vertex_element[i].src_offset; + + draw->vertex_fetch.pitch[i] = draw->vertex_buffer[buf].pitch; + draw->vertex_fetch.fetch[i] = NULL; + } + + draw->vertex_fetch.nr_attrs = nr_attrs; + draw->vertex_fetch.fetch_func = NULL; + draw->vertex_fetch.pt_fetch = NULL; + + draw->pt.hw_vertex_size = vinfo->size * 4; + + /* Just trying to figure out how this would work: + */ + if (nr_attrs == 3 && + 0 /* some other tests */) + { + draw->vertex_fetch.pt_fetch = fetch_xyz_rgb_st; + assert(vinfo->size == 10); + return TRUE; + } + + return FALSE; +} + + + +static boolean set_prim( struct draw_context *draw, + unsigned prim ) +{ + assert(!draw->user.elts); + + draw->pt.prim = prim; + + switch (prim) { + case PIPE_PRIM_LINE_LOOP: + case PIPE_PRIM_QUADS: + case PIPE_PRIM_QUAD_STRIP: + return FALSE; + default: + draw->render->set_primitive( draw->render, prim ); + return TRUE; + } +} + + + +boolean +draw_passthrough_arrays(struct draw_context *draw, + unsigned prim, + unsigned start, + unsigned count) +{ + float *hw_verts; + + if (!set_prim(draw, prim)) + return FALSE; + + if (!update_shader( draw )) + return FALSE; + + hw_verts = draw->render->allocate_vertices( draw->render, + draw->pt.hw_vertex_size, + count ); + + if (!hw_verts) + return FALSE; + + /* Single routine to fetch vertices, run shader and emit HW verts. + * Clipping and viewport transformation are done on hardware. + */ + draw->vertex_fetch.pt_fetch( draw, + hw_verts, + start, count ); + + /* Draw arrays path to avoid re-emitting index list again and + * again. + */ + draw->render->draw_arrays( draw->render, + start, + count ); + + + draw->render->release_vertices( draw->render, + hw_verts, + draw->pt.hw_vertex_size, + count ); + + return TRUE; +} + diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index c732d723a7..4147472d45 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -133,7 +133,7 @@ struct draw_vertex_shader { /* This member will disappear shortly: */ - const struct pipe_shader_state *state; + struct pipe_shader_state state; struct tgsi_shader_info info; @@ -162,8 +162,14 @@ typedef void (*full_fetch_func)( struct draw_context *draw, const unsigned *elts, unsigned count ); +typedef void (*pt_fetch_func)( struct draw_context *draw, + float *out, + unsigned start, + unsigned count ); +struct vbuf_render; + /** * Private context for the drawing module. */ @@ -191,6 +197,17 @@ struct draw_context struct draw_stage *rasterize; } pipeline; + + struct vbuf_render *render; + + /* Support prototype passthrough path: + */ + struct { + unsigned prim; + unsigned hw_vertex_size; + } pt; + + /* pipe state that we need: */ const struct pipe_rasterizer_state *rasterizer; struct pipe_viewport_state viewport; @@ -244,6 +261,7 @@ struct draw_context fetch_func fetch[PIPE_ATTRIB_MAX]; unsigned nr_attrs; full_fetch_func fetch_func; + pt_fetch_func pt_fetch; } vertex_fetch; /* Post-tnl vertex cache: @@ -331,6 +349,15 @@ struct tgsi_exec_machine; extern void draw_update_vertex_fetch( struct draw_context *draw ); +/* Prototype/hack + */ +boolean +draw_passthrough_arrays(struct draw_context *draw, + unsigned prim, + unsigned start, + unsigned count); + + #define DRAW_FLUSH_SHADER_QUEUE 0x1 /* sized not to overflow, never raised */ #define DRAW_FLUSH_PRIM_QUEUE 0x2 #define DRAW_FLUSH_VERTEX_CACHE 0x4 diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c index 2cfeb813b3..8b3e84a9a0 100644 --- a/src/gallium/auxiliary/draw/draw_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pstipple.c @@ -67,16 +67,18 @@ struct pstip_stage struct draw_stage stage; void *sampler_cso; - struct pipe_texture *texture; uint sampler_unit; + struct pipe_texture *texture; + uint num_samplers; + uint num_textures; /* * Currently bound state */ struct pstip_fragment_shader *fs; struct { - void *sampler[PIPE_MAX_SAMPLERS]; - struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + void *samplers[PIPE_MAX_SAMPLERS]; + struct pipe_texture *textures[PIPE_MAX_SAMPLERS]; const struct pipe_poly_stipple *stipple; } state; @@ -88,11 +90,10 @@ struct pstip_stage void (*driver_bind_fs_state)(struct pipe_context *, void *); void (*driver_delete_fs_state)(struct pipe_context *, void *); - void (*driver_bind_sampler_state)(struct pipe_context *, unsigned, void *); + void (*driver_bind_sampler_states)(struct pipe_context *, unsigned, void **); - void (*driver_set_sampler_texture)(struct pipe_context *, - unsigned sampler, - struct pipe_texture *); + void (*driver_set_sampler_textures)(struct pipe_context *, unsigned, + struct pipe_texture **); void (*driver_set_polygon_stipple)(struct pipe_context *, const struct pipe_poly_stipple *); @@ -484,18 +485,25 @@ static void pstip_first_tri(struct draw_stage *stage, struct prim_header *header) { struct pstip_stage *pstip = pstip_stage(stage); - struct draw_context *draw = stage->draw; struct pipe_context *pipe = pstip->pipe; + uint num_samplers; - assert(draw->rasterizer->poly_stipple_enable); + /* how many samplers? */ + /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */ + num_samplers = MAX2(pstip->num_textures, pstip->num_samplers); + num_samplers = MAX2(num_samplers, pstip->sampler_unit + 1); - /* - * Bind our fragprog, sampler and texture - */ + assert(stage->draw->rasterizer->poly_stipple_enable); + + /* bind our fragprog */ bind_pstip_fragment_shader(pstip); - pstip->driver_bind_sampler_state(pipe, pstip->sampler_unit, pstip->sampler_cso); - pstip->driver_set_sampler_texture(pipe, pstip->sampler_unit, pstip->texture); + /* plug in our sampler, texture */ + pstip->state.samplers[pstip->sampler_unit] = pstip->sampler_cso; + pstip->state.textures[pstip->sampler_unit] = pstip->texture; + + pstip->driver_bind_sampler_states(pipe, num_samplers, pstip->state.samplers); + pstip->driver_set_sampler_textures(pipe, num_samplers, pstip->state.textures); /* now really draw first line */ stage->tri = passthrough_tri; @@ -517,10 +525,10 @@ pstip_flush(struct draw_stage *stage, unsigned flags) pstip->driver_bind_fs_state(pipe, pstip->fs->driver_fs); /* XXX restore original texture, sampler state */ - pstip->driver_bind_sampler_state(pipe, pstip->sampler_unit, - pstip->state.sampler[pstip->sampler_unit]); - pstip->driver_set_sampler_texture(pipe, pstip->sampler_unit, - pstip->state.texture[pstip->sampler_unit]); + pstip->driver_bind_sampler_states(pipe, pstip->num_samplers, + pstip->state.samplers); + pstip->driver_set_sampler_textures(pipe, pstip->num_textures, + pstip->state.textures); } @@ -597,7 +605,8 @@ pstip_bind_fs_state(struct pipe_context *pipe, void *fs) /* save current */ pstip->fs = aafs; /* pass-through */ - pstip->driver_bind_fs_state(pstip->pipe, aafs->driver_fs); + pstip->driver_bind_fs_state(pstip->pipe, + (aafs ? aafs->driver_fs : NULL)); } @@ -613,26 +622,28 @@ pstip_delete_fs_state(struct pipe_context *pipe, void *fs) static void -pstip_bind_sampler_state(struct pipe_context *pipe, - unsigned unit, void *sampler) +pstip_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) { struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); /* save current */ - pstip->state.sampler[unit] = sampler; + memcpy(pstip->state.samplers, sampler, num * sizeof(void *)); + pstip->num_samplers = num; /* pass-through */ - pstip->driver_bind_sampler_state(pstip->pipe, unit, sampler); + pstip->driver_bind_sampler_states(pstip->pipe, num, sampler); } static void -pstip_set_sampler_texture(struct pipe_context *pipe, - unsigned sampler, struct pipe_texture *texture) +pstip_set_sampler_textures(struct pipe_context *pipe, + unsigned num, struct pipe_texture **texture) { struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); /* save current */ - pstip->state.texture[sampler] = texture; + memcpy(pstip->state.textures, texture, num * sizeof(struct pipe_texture *)); + pstip->num_textures = num; /* pass-through */ - pstip->driver_set_sampler_texture(pstip->pipe, sampler, texture); + pstip->driver_set_sampler_textures(pstip->pipe, num, texture); } @@ -682,8 +693,8 @@ draw_install_pstipple_stage(struct draw_context *draw, pstip->driver_bind_fs_state = pipe->bind_fs_state; pstip->driver_delete_fs_state = pipe->delete_fs_state; - pstip->driver_bind_sampler_state = pipe->bind_sampler_state; - pstip->driver_set_sampler_texture = pipe->set_sampler_texture; + pstip->driver_bind_sampler_states = pipe->bind_sampler_states; + pstip->driver_set_sampler_textures = pipe->set_sampler_textures; pstip->driver_set_polygon_stipple = pipe->set_polygon_stipple; /* override the driver's functions */ @@ -691,7 +702,7 @@ draw_install_pstipple_stage(struct draw_context *draw, pipe->bind_fs_state = pstip_bind_fs_state; pipe->delete_fs_state = pstip_delete_fs_state; - pipe->bind_sampler_state = pstip_bind_sampler_state; - pipe->set_sampler_texture = pstip_set_sampler_texture; + pipe->bind_sampler_states = pstip_bind_sampler_states; + pipe->set_sampler_textures = pstip_set_sampler_textures; pipe->set_polygon_stipple = pstip_set_polygon_stipple; } diff --git a/src/gallium/auxiliary/draw/draw_unfilled.c b/src/gallium/auxiliary/draw/draw_unfilled.c index 4d718d514c..b07860cd9e 100644 --- a/src/gallium/auxiliary/draw/draw_unfilled.c +++ b/src/gallium/auxiliary/draw/draw_unfilled.c @@ -129,7 +129,7 @@ static void unfilled_tri( struct draw_stage *stage, points( stage, header ); break; default: - abort(); + assert(0); } } diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h index cfd2b9820c..5e7de905c1 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.h +++ b/src/gallium/auxiliary/draw/draw_vbuf.h @@ -85,6 +85,12 @@ struct vbuf_render { const ushort *indices, uint nr_indices ); + /* Draw Arrays path too. + */ + void (*draw_arrays)( struct vbuf_render *, + unsigned start, + uint nr ); + /** * Called when vbuf is done with this set of vertices: */ diff --git a/src/gallium/auxiliary/draw/draw_vertex_cache.c b/src/gallium/auxiliary/draw/draw_vertex_cache.c index 53f8bbec44..161b247d4e 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_cache.c +++ b/src/gallium/auxiliary/draw/draw_vertex_cache.c @@ -41,7 +41,11 @@ void draw_vertex_cache_invalidate( struct draw_context *draw ) assert(draw->vs.queue_nr == 0); assert(draw->vcache.referenced == 0); -// memset(draw->vcache.idx, ~0, sizeof(draw->vcache.idx)); + /* There's an error somewhere in the vcache code that requires this + * memset. The bug is exposed in q3demo demo001, but probably + * elsewhere as well. Will track it down later. + */ + memset(draw->vcache.idx, ~0, sizeof(draw->vcache.idx)); } diff --git a/src/gallium/auxiliary/draw/draw_vertex_fetch.c b/src/gallium/auxiliary/draw/draw_vertex_fetch.c index cb8cdd04a3..b56d85396d 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_fetch.c +++ b/src/gallium/auxiliary/draw/draw_vertex_fetch.c @@ -54,7 +54,7 @@ fetch_##NAME(const void *ptr, float *attrib) \ int i; \ \ for (i = 0; i < SZ; i++) { \ - attrib[i] = CVT; \ + attrib[i] = CVT(i); \ } \ \ for (; i < 4; i++) { \ @@ -62,24 +62,24 @@ fetch_##NAME(const void *ptr, float *attrib) \ } \ } -#define CVT_64_FLOAT (float) ((double *) ptr)[i] -#define CVT_32_FLOAT ((float *) ptr)[i] +#define CVT_64_FLOAT(i) (float) ((double *) ptr)[i] +#define CVT_32_FLOAT(i) ((float *) ptr)[i] -#define CVT_8_USCALED (float) ((unsigned char *) ptr)[i] -#define CVT_16_USCALED (float) ((unsigned short *) ptr)[i] -#define CVT_32_USCALED (float) ((unsigned int *) ptr)[i] +#define CVT_8_USCALED(i) (float) ((unsigned char *) ptr)[i] +#define CVT_16_USCALED(i) (float) ((unsigned short *) ptr)[i] +#define CVT_32_USCALED(i) (float) ((unsigned int *) ptr)[i] -#define CVT_8_SSCALED (float) ((char *) ptr)[i] -#define CVT_16_SSCALED (float) ((short *) ptr)[i] -#define CVT_32_SSCALED (float) ((int *) ptr)[i] +#define CVT_8_SSCALED(i) (float) ((char *) ptr)[i] +#define CVT_16_SSCALED(i) (float) ((short *) ptr)[i] +#define CVT_32_SSCALED(i) (float) ((int *) ptr)[i] -#define CVT_8_UNORM (float) ((unsigned char *) ptr)[i] / 255.0f -#define CVT_16_UNORM (float) ((unsigned short *) ptr)[i] / 65535.0f -#define CVT_32_UNORM (float) ((unsigned int *) ptr)[i] / 4294967295.0f +#define CVT_8_UNORM(i) (float) ((unsigned char *) ptr)[i] / 255.0f +#define CVT_16_UNORM(i) (float) ((unsigned short *) ptr)[i] / 65535.0f +#define CVT_32_UNORM(i) (float) ((unsigned int *) ptr)[i] / 4294967295.0f -#define CVT_8_SNORM (float) ((char *) ptr)[i] / 127.0f -#define CVT_16_SNORM (float) ((short *) ptr)[i] / 32767.0f -#define CVT_32_SNORM (float) ((int *) ptr)[i] / 2147483647.0f +#define CVT_8_SNORM(i) (float) ((char *) ptr)[i] / 127.0f +#define CVT_16_SNORM(i) (float) ((short *) ptr)[i] / 32767.0f +#define CVT_32_SNORM(i) (float) ((int *) ptr)[i] / 2147483647.0f FETCH_ATTRIB( R64G64B64A64_FLOAT, 4, CVT_64_FLOAT ) FETCH_ATTRIB( R64G64B64_FLOAT, 3, CVT_64_FLOAT ) @@ -156,6 +156,16 @@ FETCH_ATTRIB( A8R8G8B8_UNORM, 4, CVT_8_UNORM ) +static void +fetch_B8G8R8A8_UNORM(const void *ptr, float *attrib) +{ + attrib[2] = CVT_8_UNORM(0); + attrib[1] = CVT_8_UNORM(1); + attrib[0] = CVT_8_UNORM(2); + attrib[3] = CVT_8_UNORM(3); +} + + static fetch_func get_fetch_func( enum pipe_format format ) { #if 0 @@ -296,6 +306,10 @@ static fetch_func get_fetch_func( enum pipe_format format ) case PIPE_FORMAT_A8R8G8B8_UNORM: return fetch_A8R8G8B8_UNORM; + + case PIPE_FORMAT_B8G8R8A8_UNORM: + return fetch_B8G8R8A8_UNORM; + case 0: return NULL; /* not sure why this is needed */ diff --git a/src/gallium/auxiliary/draw/draw_vertex_shader.c b/src/gallium/auxiliary/draw/draw_vertex_shader.c index 1e95355555..133418baca 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_shader.c +++ b/src/gallium/auxiliary/draw/draw_vertex_shader.c @@ -110,13 +110,20 @@ draw_bind_vertex_shader(struct draw_context *draw, struct draw_vertex_shader *dvs) { draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + + if (dvs) + { + draw->vertex_shader = dvs; + draw->num_vs_outputs = dvs->info.num_outputs; - draw->vertex_shader = dvs; - draw->num_vs_outputs = dvs->info.num_outputs; + tgsi_exec_machine_init(&draw->machine); - tgsi_exec_machine_init(&draw->machine); - - dvs->prepare( dvs, draw ); + dvs->prepare( dvs, draw ); + } + else { + draw->vertex_shader = NULL; + draw->num_vs_outputs = 0; + } } diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 583812aadd..55bec14116 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -71,7 +71,7 @@ vs_exec_prepare( struct draw_vertex_shader *shader, { /* specify the vertex program to interpret/execute */ tgsi_exec_machine_bind_shader(&draw->machine, - shader->state->tokens, + shader->state.tokens, PIPE_MAX_SAMPLERS, NULL /*samplers*/ ); @@ -132,20 +132,30 @@ vs_exec_run( struct draw_vertex_shader *shader, z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; - vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); - vOut[j]->edgeflag = 1; - - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - - /* Viewport mapping */ - vOut[j]->data[0][0] = x * scale[0] + trans[0]; - vOut[j]->data[0][1] = y * scale[1] + trans[1]; - vOut[j]->data[0][2] = z * scale[2] + trans[2]; - vOut[j]->data[0][3] = w; + if (!draw->rasterizer->bypass_clipping) { + vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); + vOut[j]->edgeflag = 1; + + /* divide by w */ + w = 1.0f / w; + x *= w; + y *= w; + z *= w; + + /* Viewport mapping */ + vOut[j]->data[0][0] = x * scale[0] + trans[0]; + vOut[j]->data[0][1] = y * scale[1] + trans[1]; + vOut[j]->data[0][2] = z * scale[2] + trans[2]; + vOut[j]->data[0][3] = w; + } + else { + vOut[j]->clipmask = 0; + vOut[j]->edgeflag = 1; + vOut[j]->data[0][0] = x; + vOut[j]->data[0][1] = y; + vOut[j]->data[0][2] = z; + vOut[j]->data[0][3] = w; + } /* Remaining attributes are packed into sequential post-transform * vertex attrib slots. @@ -177,7 +187,7 @@ draw_create_vs_exec(struct draw_context *draw, if (vs == NULL) return NULL; - vs->state = state; + vs->state = *state; vs->prepare = vs_exec_prepare; vs->run = vs_exec_run; vs->delete = vs_exec_delete; diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 0fd557d667..53c260be53 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -135,25 +135,35 @@ vs_llvm_run( struct draw_vertex_shader *base, unsigned slot; float x, y, z, w; - x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; - y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; - z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; - w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; - - vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); - vOut[j]->edgeflag = 1; - - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - - /* Viewport mapping */ - vOut[j]->data[0][0] = x * scale[0] + trans[0]; - vOut[j]->data[0][1] = y * scale[1] + trans[1]; - vOut[j]->data[0][2] = z * scale[2] + trans[2]; - vOut[j]->data[0][3] = w; + if (!draw->rasterizer->bypass_clipping) { + x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; + y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; + z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; + w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; + + vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); + vOut[j]->edgeflag = 1; + + /* divide by w */ + w = 1.0f / w; + x *= w; + y *= w; + z *= w; + + /* Viewport mapping */ + vOut[j]->data[0][0] = x * scale[0] + trans[0]; + vOut[j]->data[0][1] = y * scale[1] + trans[1]; + vOut[j]->data[0][2] = z * scale[2] + trans[2]; + vOut[j]->data[0][3] = w; + } + else { + vOut[j]->clipmask = 0; + vOut[j]->edgeflag = 1; + vOut[j]->data[0][0] = x; + vOut[j]->data[0][1] = y; + vOut[j]->data[0][2] = z; + vOut[j]->data[0][3] = w; + } /* Remaining attributes are packed into sequential post-transform * vertex attrib slots. diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 0b8bc2bf14..5ee2adb344 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -158,20 +158,30 @@ vs_sse_run( struct draw_vertex_shader *base, z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; - vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); - vOut[j]->edgeflag = 1; - - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - - /* Viewport mapping */ - vOut[j]->data[0][0] = x * scale[0] + trans[0]; - vOut[j]->data[0][1] = y * scale[1] + trans[1]; - vOut[j]->data[0][2] = z * scale[2] + trans[2]; - vOut[j]->data[0][3] = w; + if (!draw->rasterizer->bypass_clipping) { + vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); + vOut[j]->edgeflag = 1; + + /* divide by w */ + w = 1.0f / w; + x *= w; + y *= w; + z *= w; + + /* Viewport mapping */ + vOut[j]->data[0][0] = x * scale[0] + trans[0]; + vOut[j]->data[0][1] = y * scale[1] + trans[1]; + vOut[j]->data[0][2] = z * scale[2] + trans[2]; + vOut[j]->data[0][3] = w; + } + else { + vOut[j]->clipmask = 0; + vOut[j]->edgeflag = 1; + vOut[j]->data[0][0] = x; + vOut[j]->data[0][1] = y; + vOut[j]->data[0][2] = z; + vOut[j]->data[0][3] = w; + } /* Remaining attributes are packed into sequential post-transform * vertex attrib slots. @@ -211,14 +221,14 @@ draw_create_vs_sse(struct draw_context *draw, if (vs == NULL) return NULL; - vs->base.state = templ; + vs->base.state = *templ; vs->base.prepare = vs_sse_prepare; vs->base.run = vs_sse_run; vs->base.delete = vs_sse_delete; x86_init_func( &vs->sse2_program ); - if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state->tokens, + if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens, &vs->sse2_program )) goto fail; diff --git a/src/gallium/auxiliary/gallivm/Makefile b/src/gallium/auxiliary/gallivm/Makefile index 39fac6ea4a..c24e19e062 100644 --- a/src/gallium/auxiliary/gallivm/Makefile +++ b/src/gallium/auxiliary/gallivm/Makefile @@ -15,7 +15,7 @@ GALLIVM_SOURCES = \ storagesoa.cpp \ instructionssoa.cpp -INC_SOURCES = gallivm_builtins.cpp +INC_SOURCES = gallivm_builtins.cpp gallivmsoabuiltins.cpp CPP_SOURCES = \ $(GALLIVM_SOURCES) @@ -65,8 +65,10 @@ depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(INC_SOURCES) gallivm_builtins.cpp: llvm_builtins.c - clang --emit-llvm $< |llvm-as|opt -std-compile-opts|llvm2cpp -gen-contents -o=$@ -f -for=shader -funcname=createGallivmBuiltins + clang --emit-llvm < $< |llvm-as|opt -std-compile-opts|llvm2cpp -gen-contents -o=$@ -f -for=shader -funcname=createGallivmBuiltins +gallivmsoabuiltins.cpp: soabuiltins.c + clang --emit-llvm < $< |llvm-as|opt -std-compile-opts|llvm2cpp -gen-module -o=$@ -f -for=shader -funcname=createSoaBuiltins # Emacs tags tags: @@ -78,6 +80,7 @@ clean: -rm -f *.o */*.o *~ *.so *~ server/*.o -rm -f depend depend.bak -rm -f gallivm_builtins.cpp + -rm -f gallivmsoabuiltins.cpp symlinks: diff --git a/src/gallium/auxiliary/gallivm/gallivm.cpp b/src/gallium/auxiliary/gallivm/gallivm.cpp index d14bb3b99a..b6f641a3f8 100644 --- a/src/gallium/auxiliary/gallivm/gallivm.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm.cpp @@ -306,11 +306,19 @@ struct gallivm_prog * gallivm_ir_compile(struct gallivm_ir *ir) { struct gallivm_prog *prog = (struct gallivm_prog *)calloc(1, sizeof(struct gallivm_prog)); + + std::cout << "Before optimizations:"<<std::endl; + ir->module->dump(); + std::cout<<"-------------------------------"<<std::endl; + + PassManager veri; + veri.add(createVerifierPass()); + veri.run(*ir->module); llvm::Module *mod = llvm::CloneModule(ir->module); prog->num_consts = ir->num_consts; memcpy(prog->interpolators, ir->interpolators, sizeof(prog->interpolators)); prog->num_interp = ir->num_interp; - + /* Run optimization passes over it */ PassManager passes; passes.add(new TargetData(mod)); diff --git a/src/gallium/auxiliary/gallivm/gallivm.h b/src/gallium/auxiliary/gallivm/gallivm.h index 57912a952f..b4d6555d2f 100644 --- a/src/gallium/auxiliary/gallivm/gallivm.h +++ b/src/gallium/auxiliary/gallivm/gallivm.h @@ -33,6 +33,16 @@ #ifndef GALLIVM_H #define GALLIVM_H +/* + LLVM representation consists of two stages - layout independent + intermediate representation gallivm_ir and driver specific + gallivm_prog. TGSI is first being translated into gallivm_ir + after that driver can set number of options on gallivm_ir and + have it compiled into gallivm_prog. gallivm_prog can be either + executed (assuming there's LLVM JIT backend for the current + target) or machine code generation can be done (assuming there's + a LLVM code generator for thecurrent target) + */ #if defined __cplusplus extern "C" { #endif diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp index 55d39fa5f1..8919491792 100644 --- a/src/gallium/auxiliary/gallivm/instructions.cpp +++ b/src/gallium/auxiliary/gallivm/instructions.cpp @@ -42,6 +42,7 @@ #include <llvm/InstrTypes.h> #include <llvm/Instructions.h> #include <llvm/ParameterAttributes.h> +#include <llvm/ParamAttrsList.h> #include <sstream> #include <fstream> @@ -51,6 +52,15 @@ using namespace llvm; #include "gallivm_builtins.cpp" +#if 0 + +llvm::Value *arrayFromChannels(std::vector<llvm::Value*> &vals) +{ + VectorType *vectorType = VectorType::get(Type::FloatTy, 4); + ArrayType *vectorArray = ArrayType::get(vectorType, 4); +} +#endif + static inline std::string createFuncName(int label) { std::ostringstream stream; diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index a4d5046637..89d513afd0 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -2,9 +2,28 @@ #include "storagesoa.h" +#include "pipe/p_shader_tokens.h" + +#include <llvm/CallingConv.h> #include <llvm/Constants.h> +#include <llvm/Module.h> +#include <llvm/Function.h> +#include <llvm/Instructions.h> +#include <llvm/Transforms/Utils/Cloning.h> +#include <llvm/ParamAttrsList.h> + +#include <iostream> + +/* disable some warnings. this file is autogenerated */ +#if defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wunused-variable" +#endif using namespace llvm; +#include "gallivmsoabuiltins.cpp" +#if defined(__GNUC__) +#pragma GCC diagnostic warning "-Wunused-variable" +#endif InstructionsSoa::InstructionsSoa(llvm::Module *mod, llvm::Function *func, llvm::BasicBlock *block, StorageSoa *storage) @@ -12,6 +31,8 @@ InstructionsSoa::InstructionsSoa(llvm::Module *mod, llvm::Function *func, m_storage(storage), m_idx(0) { + createFunctionMap(); + createBuiltins(); } const char * InstructionsSoa::name(const char *prefix) const @@ -119,3 +140,167 @@ std::vector<llvm::Value*> InstructionsSoa::extractVector(llvm::Value *vector) return res; } + +void InstructionsSoa::createFunctionMap() +{ + m_functionsMap[TGSI_OPCODE_DP3] = "dp3"; + m_functionsMap[TGSI_OPCODE_DP4] = "dp4"; +} + +llvm::Function * InstructionsSoa::function(int op) +{ + if (m_functions.find(op) != m_functions.end()) + return m_functions[op]; + + std::string name = m_functionsMap[op]; + + llvm::Function *originalFunc = m_builtins->getFunction(name); + llvm::Function *func = CloneFunction(originalFunc); + currentModule()->getFunctionList().push_back(func); + std::cout << "Func parent is "<<func->getParent() + <<", cur is "<<currentModule() <<std::endl; + func->dump(); + //func->setParent(currentModule()); + m_functions[op] = func; + return func; +} + +llvm::Module * InstructionsSoa::currentModule() const +{ + BasicBlock *block = m_builder.GetInsertBlock(); + if (!block || !block->getParent()) + return 0; + + return block->getParent()->getParent(); +} + +void InstructionsSoa::createBuiltins() +{ + m_builtins = createSoaBuiltins(); +} + +std::vector<llvm::Value*> InstructionsSoa::dp3(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + llvm::Function *func = function(TGSI_OPCODE_DP3); + return callBuiltin(func, in1, in2); +} + +llvm::Value * InstructionsSoa::allocaTemp() +{ + VectorType *vector = VectorType::get(Type::FloatTy, 4); + ArrayType *vecArray = ArrayType::get(vector, 4); + AllocaInst *alloca = new AllocaInst(vecArray, name("tmpRes"), + m_builder.GetInsertBlock()); + + std::vector<Value*> indices; + indices.push_back(m_storage->constantInt(0)); + indices.push_back(m_storage->constantInt(0)); + GetElementPtrInst *getElem = new GetElementPtrInst(alloca, + indices.begin(), + indices.end(), + name("allocaPtr"), + m_builder.GetInsertBlock()); + return getElem; +} + +std::vector<llvm::Value*> InstructionsSoa::allocaToResult(llvm::Value *allocaPtr) +{ + GetElementPtrInst *xElemPtr = new GetElementPtrInst(allocaPtr, + m_storage->constantInt(0), + name("xPtr"), + m_builder.GetInsertBlock()); + GetElementPtrInst *yElemPtr = new GetElementPtrInst(allocaPtr, + m_storage->constantInt(1), + name("yPtr"), + m_builder.GetInsertBlock()); + GetElementPtrInst *zElemPtr = new GetElementPtrInst(allocaPtr, + m_storage->constantInt(2), + name("zPtr"), + m_builder.GetInsertBlock()); + GetElementPtrInst *wElemPtr = new GetElementPtrInst(allocaPtr, + m_storage->constantInt(3), + name("wPtr"), + m_builder.GetInsertBlock()); + + std::vector<llvm::Value*> res(4); + res[0] = new LoadInst(xElemPtr, name("xRes"), false, m_builder.GetInsertBlock()); + res[1] = new LoadInst(yElemPtr, name("yRes"), false, m_builder.GetInsertBlock()); + res[2] = new LoadInst(zElemPtr, name("zRes"), false, m_builder.GetInsertBlock()); + res[3] = new LoadInst(wElemPtr, name("wRes"), false, m_builder.GetInsertBlock()); + + return res; +} + +std::vector<llvm::Value*> InstructionsSoa::dp4(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + llvm::Function *func = function(TGSI_OPCODE_DP4); + return callBuiltin(func, in1, in2); +} + +std::vector<Value*> InstructionsSoa::callBuiltin(llvm::Function *func, const std::vector<llvm::Value*> in1) +{ + std::vector<Value*> params; + + llvm::Value *allocaPtr = allocaTemp(); + params.push_back(allocaPtr); + params.push_back(in1[0]); + params.push_back(in1[1]); + params.push_back(in1[2]); + params.push_back(in1[3]); + CallInst *call = m_builder.CreateCall(func, params.begin(), params.end()); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + + return allocaToResult(allocaPtr); +} + +std::vector<Value*> InstructionsSoa::callBuiltin(llvm::Function *func, const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + std::vector<Value*> params; + + llvm::Value *allocaPtr = allocaTemp(); + params.push_back(allocaPtr); + params.push_back(in1[0]); + params.push_back(in1[1]); + params.push_back(in1[2]); + params.push_back(in1[3]); + params.push_back(in2[0]); + params.push_back(in2[1]); + params.push_back(in2[2]); + params.push_back(in2[3]); + CallInst *call = m_builder.CreateCall(func, params.begin(), params.end()); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + + return allocaToResult(allocaPtr); +} + +std::vector<Value*> InstructionsSoa::callBuiltin(llvm::Function *func, const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2, + const std::vector<llvm::Value*> in3) +{ + std::vector<Value*> params; + + llvm::Value *allocaPtr = allocaTemp(); + params.push_back(allocaPtr); + params.push_back(in1[0]); + params.push_back(in1[1]); + params.push_back(in1[2]); + params.push_back(in1[3]); + params.push_back(in2[0]); + params.push_back(in2[1]); + params.push_back(in2[2]); + params.push_back(in2[3]); + params.push_back(in3[0]); + params.push_back(in3[1]); + params.push_back(in3[2]); + params.push_back(in3[3]); + CallInst *call = m_builder.CreateCall(func, params.begin(), params.end()); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + + return allocaToResult(allocaPtr); +} diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.h b/src/gallium/auxiliary/gallivm/instructionssoa.h index 4169dcbb2e..3ef51dcaff 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.h +++ b/src/gallium/auxiliary/gallivm/instructionssoa.h @@ -30,6 +30,7 @@ #include <llvm/Support/LLVMBuilder.h> +#include <map> #include <vector> namespace llvm { @@ -47,9 +48,12 @@ public: llvm::BasicBlock *block, StorageSoa *storage); std::vector<llvm::Value*> arl(const std::vector<llvm::Value*> in); - std::vector<llvm::Value*> add(const std::vector<llvm::Value*> in1, const std::vector<llvm::Value*> in2); + std::vector<llvm::Value*> dp3(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); + std::vector<llvm::Value*> dp4(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); std::vector<llvm::Value*> madd(const std::vector<llvm::Value*> in1, const std::vector<llvm::Value*> in2, const std::vector<llvm::Value*> in3); @@ -62,9 +66,29 @@ private: const char * name(const char *prefix) const; llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y, llvm::Value *z, llvm::Value *w); + void createFunctionMap(); + void createBuiltins(); + llvm::Function *function(int); + llvm::Module *currentModule() const; + llvm::Value *allocaTemp(); + std::vector<llvm::Value*> allocaToResult(llvm::Value *allocaPtr); + std::vector<llvm::Value*> callBuiltin(llvm::Function *func, + const std::vector<llvm::Value*> in1); + std::vector<llvm::Value*> callBuiltin(llvm::Function *func, + const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); + std::vector<llvm::Value*> callBuiltin(llvm::Function *func, + const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2, + const std::vector<llvm::Value*> in3); private: llvm::LLVMFoldingBuilder m_builder; StorageSoa *m_storage; + + std::map<int, std::string> m_functionsMap; + std::map<int, llvm::Function*> m_functions; + llvm::Module *m_builtins; + private: mutable int m_idx; mutable char m_name[32]; diff --git a/src/gallium/auxiliary/gallivm/soabuiltins.c b/src/gallium/auxiliary/gallivm/soabuiltins.c new file mode 100644 index 0000000000..24c14e1b69 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/soabuiltins.c @@ -0,0 +1,72 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * This file is compiled with clang into the LLVM bitcode + * + * Authors: + * Zack Rusin zack@tungstengraphics.com + */ +typedef __attribute__(( ocu_vector_type(4) )) float float4; + +void dp3(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, + float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) +{ + float4 dot = (tmp0x * tmp1x) + (tmp0y * tmp1y) + + (tmp0z * tmp1z); + + res[0] = dot; + res[1] = dot; + res[2] = dot; + res[3] = dot; +} + + +void dp4(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, + float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) +{ + float4 dot = (tmp0x * tmp1x) + (tmp0y * tmp1y) + + (tmp0z * tmp1z) + (tmp0w * tmp1w); + + res[0] = dot; + res[1] = dot; + res[2] = dot; + res[3] = dot; +} + +#if 0 +void yo(float4 *out, float4 *in) +{ + float4 res[4]; + + dp3(res, in[0], in[1], in[2], in[3], + in[4], in[5], in[6], in[7]); + out[1] = res[1]; +} +#endif diff --git a/src/gallium/auxiliary/gallivm/storagesoa.cpp b/src/gallium/auxiliary/gallivm/storagesoa.cpp index ed0674a96f..bb6fe3d7e1 100644 --- a/src/gallium/auxiliary/gallivm/storagesoa.cpp +++ b/src/gallium/auxiliary/gallivm/storagesoa.cpp @@ -277,7 +277,7 @@ llvm::Constant * StorageSoa::createConstGlobalVector(const std::vector<float> &v return constVector; } -std::vector<llvm::Value*> StorageSoa::load(Argument type, int idx, int swizzle, +std::vector<llvm::Value*> StorageSoa::load(enum tgsi_file_type type, int idx, int swizzle, llvm::Value *indIdx) { std::vector<llvm::Value*> val(4); @@ -292,25 +292,29 @@ std::vector<llvm::Value*> StorageSoa::load(Argument type, int idx, int swizzle, debug_printf("XXXXXXXXX realIdx = %p, indIdx = %p\n", realIndex, indIdx); switch(type) { - case Input: + case TGSI_FILE_INPUT: val = inputElement(realIndex); break; - case Output: + case TGSI_FILE_OUTPUT: val = outputElement(realIndex); break; - case Temp: + case TGSI_FILE_TEMPORARY: val = tempElement(realIndex); break; - case Const: + case TGSI_FILE_CONSTANT: val = constElement(realIndex); break; - case Immediate: + case TGSI_FILE_IMMEDIATE: val = immediateElement(realIndex); break; - case Address: + case TGSI_FILE_ADDRESS: debug_printf("Address not handled in the load phase!\n"); assert(0); break; + default: + debug_printf("Unknown load!\n"); + assert(0); + break; } if (!gallivm_is_swizzle(swizzle)) return val; @@ -324,21 +328,21 @@ std::vector<llvm::Value*> StorageSoa::load(Argument type, int idx, int swizzle, return res; } -void StorageSoa::store(Argument type, int idx, const std::vector<llvm::Value*> &val, +void StorageSoa::store(enum tgsi_file_type type, int idx, const std::vector<llvm::Value*> &val, int mask) { llvm::Value *out = 0; switch(type) { - case Output: + case TGSI_FILE_OUTPUT: out = m_output; break; - case Temp: + case TGSI_FILE_TEMPORARY: out = m_temps; break; - case Input: + case TGSI_FILE_INPUT: out = m_input; break; - case Address: { + case TGSI_FILE_ADDRESS: { llvm::Value *addr = m_addresses[idx]; if (!addr) { addAddress(idx); diff --git a/src/gallium/auxiliary/gallivm/storagesoa.h b/src/gallium/auxiliary/gallivm/storagesoa.h index 6443351f27..ae2fc7c6ae 100644 --- a/src/gallium/auxiliary/gallivm/storagesoa.h +++ b/src/gallium/auxiliary/gallivm/storagesoa.h @@ -28,6 +28,8 @@ #ifndef STORAGESOA_H #define STORAGESOA_H +#include <pipe/p_shader_tokens.h> + #include <vector> #include <list> #include <map> @@ -46,15 +48,6 @@ namespace llvm { class StorageSoa { public: - enum Argument { - Input, - Output, - Temp, - Const, - Immediate, - Address - }; -public: StorageSoa(llvm::BasicBlock *block, llvm::Value *input, llvm::Value *output, @@ -62,9 +55,9 @@ public: llvm::Value *temps); - std::vector<llvm::Value*> load(Argument type, int idx, int swizzle, + std::vector<llvm::Value*> load(enum tgsi_file_type type, int idx, int swizzle, llvm::Value *indIdx =0); - void store(Argument type, int idx, const std::vector<llvm::Value*> &val, + void store(enum tgsi_file_type type, int idx, const std::vector<llvm::Value*> &val, int mask); void addImmediate(float *vec); diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index 2cb4acce32..3f65865a5a 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -708,25 +708,9 @@ translate_instructionir(llvm::Module *module, if (src->SrcRegister.Indirect) { indIdx = storage->addrElement(src->SrcRegisterInd.Index); } - if (src->SrcRegister.File == TGSI_FILE_CONSTANT) { - val = storage->load(StorageSoa::Const, - src->SrcRegister.Index, swizzle, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_INPUT) { - val = storage->load(StorageSoa::Input, - src->SrcRegister.Index, swizzle, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) { - val = storage->load(StorageSoa::Temp, - src->SrcRegister.Index, swizzle, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) { - val = storage->load(StorageSoa::Output, - src->SrcRegister.Index, swizzle, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_IMMEDIATE) { - val = storage->load(StorageSoa::Immediate, - src->SrcRegister.Index, swizzle, indIdx); - } else { - fprintf(stderr, "ERROR: not supported llvm source %d\n", src->SrcRegister.File); - return; - } + + val = storage->load((enum tgsi_file_type)src->SrcRegister.File, + src->SrcRegister.Index, swizzle, indIdx); inputs[i] = val; } @@ -763,9 +747,11 @@ translate_instructionir(llvm::Module *module, } break; case TGSI_OPCODE_DP3: { + out = instr->dp3(inputs[0], inputs[1]); } break; case TGSI_OPCODE_DP4: { + out = instr->dp4(inputs[0], inputs[1]); } break; case TGSI_OPCODE_DST: { @@ -1067,19 +1053,8 @@ translate_instructionir(llvm::Module *module, for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; - if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { - storage->store(StorageSoa::Output, - dst->DstRegister.Index, out, dst->DstRegister.WriteMask); - } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) { - storage->store(StorageSoa::Temp, - dst->DstRegister.Index, out, dst->DstRegister.WriteMask); - } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) { - storage->store(StorageSoa::Address, - dst->DstRegister.Index, out, dst->DstRegister.WriteMask); - } else { - fprintf(stderr, "ERROR: unsupported LLVM destination!"); - assert(!"wrong destination"); - } + storage->store((enum tgsi_file_type)dst->DstRegister.File, + dst->DstRegister.Index, out, dst->DstRegister.WriteMask); } } diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 95a2d6fcbb..a996218ce7 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -306,6 +306,11 @@ void spe_init_func(struct spe_function *p, unsigned code_size) { p->store = align_malloc(code_size, 16); p->csr = p->store; + + /* Conservatively treat R0 - R2 and R80 - R127 as non-volatile. + */ + p->regs[0] = ~7; + p->regs[1] = (1U << (80 - 64)) - 1; } @@ -317,6 +322,48 @@ void spe_release_func(struct spe_function *p) } +int spe_allocate_available_register(struct spe_function *p) +{ + unsigned i; + for (i = 0; i < 128; i++) { + const uint64_t mask = (1ULL << (i % 128)); + const unsigned idx = i / 128; + + if ((p->regs[idx] & mask) != 0) { + p->regs[idx] &= ~mask; + return i; + } + } + + return -1; +} + + +int spe_allocate_register(struct spe_function *p, int reg) +{ + const unsigned idx = reg / 128; + const unsigned bit = reg % 128; + + assert((p->regs[idx] & (1ULL << bit)) != 0); + + p->regs[idx] &= ~(1ULL << bit); + return reg; +} + + +void spe_release_register(struct spe_function *p, int reg) +{ + const unsigned idx = reg / 128; + const unsigned bit = reg % 128; + + assert((p->regs[idx] & (1ULL << bit)) == 0); + + p->regs[idx] |= (1ULL << bit); +} + + + + void spe_bi(struct spe_function *p, unsigned rA, int d, int e) { emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4)); diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index 10ce44b3a0..5a1eb1ed8d 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -39,11 +39,27 @@ struct spe_function { uint32_t *store; uint32_t *csr; const char *fn; + + /** + * Mask of used / unused registers + * + * Each set bit corresponds to an available register. Each cleared bit + * corresponds to an allocated register. + * + * \sa + * spe_allocate_register, spe_allocate_available_register, + * spe_release_register + */ + uint64_t regs[2]; }; extern void spe_init_func(struct spe_function *p, unsigned code_size); extern void spe_release_func(struct spe_function *p); +extern int spe_allocate_available_register(struct spe_function *p); +extern int spe_allocate_register(struct spe_function *p, int reg); +extern void spe_release_register(struct spe_function *p, int reg); + #endif /* RTASM_PPC_SPE_H */ #ifndef EMIT_ diff --git a/src/gallium/auxiliary/sct/Makefile b/src/gallium/auxiliary/sct/Makefile new file mode 100644 index 0000000000..516d1756cf --- /dev/null +++ b/src/gallium/auxiliary/sct/Makefile @@ -0,0 +1,12 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = sct + +C_SOURCES = \ + sct.c + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/auxiliary/sct/SConscript b/src/gallium/auxiliary/sct/SConscript new file mode 100644 index 0000000000..76927d973f --- /dev/null +++ b/src/gallium/auxiliary/sct/SConscript @@ -0,0 +1,9 @@ +Import('*') + +sct = env.ConvenienceLibrary( + target = 'sct', + source = [ + 'sct.c' + ]) + +auxiliaries.insert(0, sct) diff --git a/src/gallium/auxiliary/sct/sct.c b/src/gallium/auxiliary/sct/sct.c new file mode 100644 index 0000000000..97ee5882a1 --- /dev/null +++ b/src/gallium/auxiliary/sct/sct.c @@ -0,0 +1,453 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_util.h" +#include "pipe/p_state.h" +#include "pipe/p_inlines.h" +#include "sct.h" + + +struct texture_list +{ + struct pipe_texture *texture; + struct texture_list *next; +}; + + + +#define MAX_SURFACES ((PIPE_MAX_COLOR_BUFS) + 1) + +struct sct_context +{ + const struct pipe_context *context; + + /** surfaces the context is drawing into */ + struct pipe_surface *surfaces[MAX_SURFACES]; + + /** currently bound textures */ + struct pipe_texture *textures[PIPE_MAX_SAMPLERS]; + + /** previously bound textures, used but not flushed */ + struct texture_list *textures_used; + + boolean needs_flush; + + struct sct_context *next; +}; + + + +struct sct_surface +{ + const struct pipe_surface *surface; + + /** list of contexts drawing to this surface */ + struct sct_context_list *contexts; + + struct sct_surface *next; +}; + + + +/** + * Find the surface_info for the given pipe_surface + */ +static struct sct_surface * +find_surface_info(struct surface_context_tracker *sct, + const struct pipe_surface *surface) +{ + struct sct_surface *si; + for (si = sct->surfaces; si; si = si->next) + if (si->surface == surface) + return si; + return NULL; +} + + +/** + * As above, but create new surface_info if surface is new. + */ +static struct sct_surface * +find_create_surface_info(struct surface_context_tracker *sct, + const struct pipe_surface *surface) +{ + struct sct_surface *si = find_surface_info(sct, surface); + if (si) + return si; + + /* alloc new */ + si = CALLOC_STRUCT(sct_surface); + if (si) { + si->surface = surface; + + /* insert at head */ + si->next = sct->surfaces; + sct->surfaces = si; + } + + return si; +} + + +/** + * Find a context_info for the given context. + */ +static struct sct_context * +find_context_info(struct surface_context_tracker *sct, + const struct pipe_context *context) +{ + struct sct_context *ci; + for (ci = sct->contexts; ci; ci = ci->next) + if (ci->context == context) + return ci; + return NULL; +} + + +/** + * As above, but create new context_info if context is new. + */ +static struct sct_context * +find_create_context_info(struct surface_context_tracker *sct, + const struct pipe_context *context) +{ + struct sct_context *ci = find_context_info(sct, context); + if (ci) + return ci; + + /* alloc new */ + ci = CALLOC_STRUCT(sct_context); + if (ci) { + ci->context = context; + + /* insert at head */ + ci->next = sct->contexts; + sct->contexts = ci; + } + + return ci; +} + + +/** + * Is the context already bound to the surface? + */ +static boolean +find_surface_context(const struct sct_surface *si, + const struct pipe_context *context) +{ + const struct sct_context_list *cl; + for (cl = si->contexts; cl; cl = cl->next) { + if (cl->context == context) { + return TRUE; + } + } + return FALSE; +} + + +/** + * Add a context to the list of contexts associated with a surface. + */ +static void +add_context_to_surface(struct sct_surface *si, + const struct pipe_context *context) +{ + struct sct_context_list *cl = CALLOC_STRUCT(sct_context_list); + if (cl) { + cl->context = context; + /* insert at head of list of contexts */ + cl->next = si->contexts; + si->contexts = cl; + } +} + + +/** + * Remove a context from the list of contexts associated with a surface. + */ +static void +remove_context_from_surface(struct sct_surface *si, + const struct pipe_context *context) +{ + struct sct_context_list *prev = NULL, *curr, *next; + + for (curr = si->contexts; curr; curr = next) { + if (curr->context == context) { + /* remove */ + if (prev) + prev->next = curr->next; + else + si->contexts = curr->next; + next = curr->next; + FREE(curr); + } + else { + prev = curr; + } + } +} + + +/** + * Unbind context from surface. + */ +static void +unbind_context_surface(struct surface_context_tracker *sct, + struct pipe_context *context, + struct pipe_surface *surface) +{ + struct sct_surface *si = find_surface_info(sct, surface); + if (si) { + remove_context_from_surface(si, context); + } +} + + +/** + * Bind context to a set of surfaces (color + Z). + * Like MakeCurrent(). + */ +void +sct_bind_surfaces(struct surface_context_tracker *sct, + struct pipe_context *context, + uint num_surf, + struct pipe_surface **surfaces) +{ + struct sct_context *ci = find_create_context_info(sct, context); + uint i; + + if (!ci) { + return; /* out of memory */ + } + + /* unbind currently bound surfaces */ + for (i = 0; i < MAX_SURFACES; i++) { + if (ci->surfaces[i]) { + unbind_context_surface(sct, context, ci->surfaces[i]); + } + } + + /* bind new surfaces */ + for (i = 0; i < num_surf; i++) { + struct sct_surface *si = find_create_surface_info(sct, surfaces[i]); + if (!find_surface_context(si, context)) { + add_context_to_surface(si, context); + } + } +} + + +/** + * Return list of contexts bound to a surface. + */ +const struct sct_context_list * +sct_get_surface_contexts(struct surface_context_tracker *sct, + const struct pipe_surface *surface) +{ + const struct sct_surface *si = find_surface_info(sct, surface); + return si->contexts; +} + + + +static boolean +find_texture(const struct sct_context *ci, + const struct pipe_texture *texture) +{ + const struct texture_list *tl; + + for (tl = ci->textures_used; tl; tl = tl->next) { + if (tl->texture == texture) { + return TRUE; + } + } + return FALSE; +} + + +/** + * Add the given texture to the context's list of used textures. + */ +static void +add_texture_used(struct sct_context *ci, + struct pipe_texture *texture) +{ + if (!find_texture(ci, texture)) { + /* add to list */ + struct texture_list *tl = CALLOC_STRUCT(texture_list); + if (tl) { + pipe_texture_reference(&tl->texture, texture); + /* insert at head */ + tl->next = ci->textures_used; + ci->textures_used = tl; + } + } +} + + +/** + * Bind a texture to a rendering context. + */ +void +sct_bind_texture(struct surface_context_tracker *sct, + struct pipe_context *context, + uint unit, + struct pipe_texture *tex) +{ + struct sct_context *ci = find_context_info(sct, context); + + if (ci->textures[unit] != tex) { + /* put texture on the 'used' list */ + add_texture_used(ci, tex); + /* bind new */ + pipe_texture_reference(&ci->textures[unit], tex); + } +} + + +/** + * Check if the given texture has been used by the rendering context + * since the last call to sct_flush_textures(). + */ +boolean +sct_is_texture_used(struct surface_context_tracker *sct, + const struct pipe_context *context, + const struct pipe_texture *texture) +{ + const struct sct_context *ci = find_context_info(sct, context); + return find_texture(ci, texture); +} + + +/** + * To be called when the image contents of a texture are changed, such + * as for gl[Copy]TexSubImage(). + * XXX this may not be needed + */ +void +sct_update_texture(struct pipe_texture *tex) +{ + +} + + +/** + * When a scene is flushed/rendered we can release the list of + * used textures. + */ +void +sct_flush_textures(struct surface_context_tracker *sct, + struct pipe_context *context) +{ + struct sct_context *ci = find_context_info(sct, context); + struct texture_list *tl, *next; + uint i; + + for (tl = ci->textures_used; tl; tl = next) { + next = tl->next; + pipe_texture_release(&tl->texture); + FREE(tl); + } + ci->textures_used = NULL; + + /* put the currently bound textures on the 'used' list */ + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + add_texture_used(ci, ci->textures[i]); + } +} + + + +void +sct_destroy_context(struct surface_context_tracker *sct, + struct pipe_context *context) +{ + /* XXX should we require an unbinding first? */ + { + struct sct_surface *si; + for (si = sct->surfaces; si; si = si->next) { + remove_context_from_surface(si, context); + } + } + + /* remove context from context_info list */ + { + struct sct_context *ci, *next, *prev = NULL; + for (ci = sct->contexts; ci; ci = next) { + next = ci->next; + if (ci->context == context) { + if (prev) + prev->next = ci->next; + else + sct->contexts = ci->next; + FREE(ci); + } + else { + prev = ci; + } + } + } + +} + + +void +sct_destroy_surface(struct surface_context_tracker *sct, + struct pipe_surface *surface) +{ + if (1) { + /* debug/sanity: no context should be bound to surface */ + struct sct_context *ci; + uint i; + for (ci = sct->contexts; ci; ci = ci->next) { + for (i = 0; i < MAX_SURFACES; i++) { + assert(ci->surfaces[i] != surface); + } + } + } + + /* remove surface from sct_surface list */ + { + struct sct_surface *si, *next, *prev = NULL; + for (si = sct->surfaces; si; si = next) { + next = si->next; + if (si->surface == surface) { + /* unlink */ + if (prev) + prev->next = si->next; + else + sct->surfaces = si->next; + FREE(si); + } + else { + prev = si; + } + } + } +} diff --git a/src/gallium/auxiliary/sct/sct.h b/src/gallium/auxiliary/sct/sct.h new file mode 100644 index 0000000000..cf7c4d3bdf --- /dev/null +++ b/src/gallium/auxiliary/sct/sct.h @@ -0,0 +1,123 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Surface/Context Tracking + * + * For some drivers, we need to monitor the binding between contexts and + * surfaces/textures. + * This code may evolve quite a bit... + */ + + +#ifndef SCT_H +#define SCT_H + + +#ifdef __cplusplus +extern "C" { +#endif + + +struct pipe_context; +struct pipe_surface; + +struct sct_context; +struct sct_surface; + + +/** + * Per-device info, basically + */ +struct surface_context_tracker +{ + struct sct_context *contexts; + struct sct_surface *surfaces; +}; + + + +/** + * Simple linked list of contexts + */ +struct sct_context_list +{ + const struct pipe_context *context; + struct sct_context_list *next; +}; + + + +extern void +sct_bind_surfaces(struct surface_context_tracker *sct, + struct pipe_context *context, + uint num_surf, + struct pipe_surface **surfaces); + + +extern void +sct_bind_texture(struct surface_context_tracker *sct, + struct pipe_context *context, + uint unit, + struct pipe_texture *texture); + + +extern void +sct_update_texture(struct pipe_texture *tex); + + +extern boolean +sct_is_texture_used(struct surface_context_tracker *sct, + const struct pipe_context *context, + const struct pipe_texture *texture); + +extern void +sct_flush_textures(struct surface_context_tracker *sct, + struct pipe_context *context); + + +extern const struct sct_context_list * +sct_get_surface_contexts(struct surface_context_tracker *sct, + const struct pipe_surface *surf); + + +extern void +sct_destroy_context(struct surface_context_tracker *sct, + struct pipe_context *context); + + +extern void +sct_destroy_surface(struct surface_context_tracker *sct, + struct pipe_surface *surface); + + + +#ifdef __cplusplus +} +#endif + +#endif /* SCT_H */ diff --git a/src/gallium/auxiliary/sct/usage.c b/src/gallium/auxiliary/sct/usage.c new file mode 100644 index 0000000000..6227f19962 --- /dev/null +++ b/src/gallium/auxiliary/sct/usage.c @@ -0,0 +1,61 @@ +/* surface / context tracking */ + + +/* + +context A: + render to texture T + +context B: + texture from T + +----------------------- + +flush surface: + which contexts are bound to the surface? + +----------------------- + +glTexSubImage(): + which contexts need to be flushed? + + */ + + +/* + +in MakeCurrent(): + + call sct_bind_surfaces(context, list of surfaces) to update the + dependencies between context and surfaces + + +in SurfaceFlush(), or whatever it is in D3D: + + call sct_get_surface_contexts(surface) to get a list of contexts + which are currently bound to the surface. + + + +in BindTexture(): + + call sct_bind_texture(context, texture) to indicate that the texture + is used in the scene. + + +in glTexSubImage() or RenderToTexture(): + + call sct_is_texture_used(context, texture) to determine if the texture + has been used in the scene, but the scene's not flushed. If TRUE is + returned it means the scene has to be rendered/flushed before the contents + of the texture can be changed. + + +in psb_scene_flush/terminate(): + + call sct_flush_textures(context) to tell the SCT that the textures which + were used in the scene can be released. + + + +*/ diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c index ac52441400..f2ed9e0353 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c @@ -1220,7 +1220,8 @@ fetch_texel( struct tgsi_sampler *sampler, static void exec_tex(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst, - boolean biasLod) + boolean biasLod, + boolean projected) { const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; union tgsi_exec_channel r[8]; @@ -1234,17 +1235,9 @@ exec_tex(struct tgsi_exec_machine *mach, FETCH(&r[0], 0, CHAN_X); - switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { - case TGSI_EXTSWIZZLE_W: + if (projected) { FETCH(&r[1], 0, CHAN_W); micro_div( &r[0], &r[0], &r[1] ); - break; - - case TGSI_EXTSWIZZLE_ONE: - break; - - default: - assert (0); } if (biasLod) { @@ -1266,19 +1259,11 @@ exec_tex(struct tgsi_exec_machine *mach, FETCH(&r[1], 0, CHAN_Y); FETCH(&r[2], 0, CHAN_Z); - switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { - case TGSI_EXTSWIZZLE_W: + if (projected) { FETCH(&r[3], 0, CHAN_W); micro_div( &r[0], &r[0], &r[3] ); micro_div( &r[1], &r[1], &r[3] ); micro_div( &r[2], &r[2], &r[3] ); - break; - - case TGSI_EXTSWIZZLE_ONE: - break; - - default: - assert (0); } if (biasLod) { @@ -1300,19 +1285,11 @@ exec_tex(struct tgsi_exec_machine *mach, FETCH(&r[1], 0, CHAN_Y); FETCH(&r[2], 0, CHAN_Z); - switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { - case TGSI_EXTSWIZZLE_W: + if (projected) { FETCH(&r[3], 0, CHAN_W); micro_div( &r[0], &r[0], &r[3] ); micro_div( &r[1], &r[1], &r[3] ); micro_div( &r[2], &r[2], &r[3] ); - break; - - case TGSI_EXTSWIZZLE_ONE: - break; - - default: - assert (0); } if (biasLod) { @@ -2007,14 +1984,14 @@ exec_instruction( /* simple texture lookup */ /* src[0] = texcoord */ /* src[1] = sampler unit */ - exec_tex(mach, inst, FALSE); + exec_tex(mach, inst, FALSE, FALSE); break; case TGSI_OPCODE_TXB: /* Texture lookup with lod bias */ /* src[0] = texcoord (src[0].w = LOD bias) */ /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE); + exec_tex(mach, inst, TRUE, FALSE); break; case TGSI_OPCODE_TXD: @@ -2030,7 +2007,14 @@ exec_instruction( /* Texture lookup with explit LOD */ /* src[0] = texcoord (src[0].w = LOD) */ /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE); + exec_tex(mach, inst, TRUE, FALSE); + break; + + case TGSI_OPCODE_TXP: + /* Texture lookup with projection */ + /* src[0] = texcoord (src[0].w = projection) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, FALSE, TRUE); break; case TGSI_OPCODE_UP2H: diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.c b/src/gallium/auxiliary/tgsi/util/tgsi_build.c index a00ff1c2a5..9c883ab704 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.c @@ -719,7 +719,6 @@ tgsi_build_full_instruction( reg->SrcRegisterExtSwz.NegateY, reg->SrcRegisterExtSwz.NegateZ, reg->SrcRegisterExtSwz.NegateW, - reg->SrcRegisterExtSwz.ExtDivide, prev_token, instruction, header ); @@ -1057,7 +1056,6 @@ tgsi_default_src_register_ext_swz( void ) src_register_ext_swz.NegateY = 0; src_register_ext_swz.NegateZ = 0; src_register_ext_swz.NegateW = 0; - src_register_ext_swz.ExtDivide = TGSI_EXTSWIZZLE_ONE; src_register_ext_swz.Padding = 0; src_register_ext_swz.Extended = 0; @@ -1084,7 +1082,6 @@ tgsi_build_src_register_ext_swz( unsigned negate_y, unsigned negate_z, unsigned negate_w, - unsigned ext_divide, struct tgsi_token *prev_token, struct tgsi_instruction *instruction, struct tgsi_header *header ) @@ -1099,7 +1096,6 @@ tgsi_build_src_register_ext_swz( assert( negate_y <= 1 ); assert( negate_z <= 1 ); assert( negate_w <= 1 ); - assert( ext_divide <= TGSI_EXTSWIZZLE_ONE ); src_register_ext_swz = tgsi_default_src_register_ext_swz(); src_register_ext_swz.ExtSwizzleX = ext_swizzle_x; @@ -1110,7 +1106,6 @@ tgsi_build_src_register_ext_swz( src_register_ext_swz.NegateY = negate_y; src_register_ext_swz.NegateZ = negate_z; src_register_ext_swz.NegateW = negate_w; - src_register_ext_swz.ExtDivide = ext_divide; prev_token->Extended = 1; instruction_grow( instruction, header ); diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.h b/src/gallium/auxiliary/tgsi/util/tgsi_build.h index 607860e7fc..80bffc4ae7 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_build.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.h @@ -229,7 +229,6 @@ tgsi_build_src_register_ext_swz( unsigned negate_y, unsigned negate_z, unsigned negate_w, - unsigned ext_divide, struct tgsi_token *prev_token, struct tgsi_instruction *instruction, struct tgsi_header *header ); diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index 59be14a748..ceb407b884 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -459,7 +459,8 @@ static const char *TGSI_OPCODES[] = "OPCODE_IFC", "OPCODE_BREAKC", "OPCODE_KIL", - "OPCODE_END" + "OPCODE_END", + "OPCODE_TXP" }; static const char *TGSI_OPCODES_SHORT[] = @@ -597,7 +598,8 @@ static const char *TGSI_OPCODES_SHORT[] = "IFC", "BREAKC", "KIL", - "END" + "END", + "TXP" }; static const char *TGSI_SATS[] = @@ -1361,10 +1363,6 @@ dump_instruction_verbose( TXT( "\nNegateW : " ); UID( src->SrcRegisterExtSwz.NegateW ); } - if( deflt || fs->SrcRegisterExtSwz.ExtDivide != src->SrcRegisterExtSwz.ExtDivide ) { - TXT( "\nExtDivide : " ); - ENM( src->SrcRegisterExtSwz.ExtDivide, TGSI_EXTSWIZZLES ); - } if( ignored ) { TXT( "\nPadding : " ); UIX( src->SrcRegisterExtSwz.Padding ); diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index 906a46d6b4..2abbe9500e 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -7,7 +7,9 @@ C_SOURCES = \ p_debug.c \ p_tile.c \ p_util.c \ - u_mm.c + u_handle_table.c \ + u_mm.c \ + u_snprintf.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index 4717941434..2030214aa7 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -6,7 +6,9 @@ util = env.ConvenienceLibrary( 'p_debug.c', 'p_tile.c', 'p_util.c', + 'u_handle_table.c', 'u_mm.c', + 'u_snprintf.c', ]) auxiliaries.insert(0, util) diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index b9607a6ba7..09cabdae25 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -36,14 +36,37 @@ #include <stdlib.h> #endif -#include "pipe/p_debug.h" #include "pipe/p_compiler.h" +#include "pipe/p_util.h" +#include "pipe/p_debug.h" + + +#ifdef WIN32 +static INLINE void +rpl_EngDebugPrint(const char *format, ...) +{ + va_list ap; + va_start(ap, format); + EngDebugPrint("", (PCHAR)format, ap); + va_end(ap); +} + +int rpl_vsnprintf(char *, size_t, const char *, va_list); +#endif void debug_vprintf(const char *format, va_list ap) { #ifdef WIN32 - EngDebugPrint("Gallium3D: ", (PCHAR)format, ap); +#ifndef WINCE + /* EngDebugPrint does not handle float point arguments, so we need to use + * our own vsnprintf implementation */ + char buf[512 + 1]; + rpl_vsnprintf(buf, sizeof(buf), format, ap); + rpl_EngDebugPrint("%s", buf); +#else + /* TODO: Implement debug print for WINCE */ +#endif #else vfprintf(stderr, format, ap); #endif @@ -59,18 +82,92 @@ void debug_printf(const char *format, ...) } -static INLINE void debug_abort(void) +/* TODO: implement a debug_abort that calls EngBugCheckEx on WIN32 */ + + +static INLINE void debug_break(void) { -#ifdef WIN32 +#if (defined(__i386__) || defined(__386__)) && defined(__GNUC__) + __asm("int3"); +#elif (defined(__i386__) || defined(__386__)) && defined(__MSC__) + _asm {int 3}; +#elif defined(WIN32) && !defined(WINCE) EngDebugBreak(); #else abort(); #endif } +#if defined(WIN32) +ULONG_PTR debug_config_file = 0; +void *mapped_config_file = 0; + +enum { + eAssertAbortEn = 0x1, +}; + +/* Check for aborts enabled. */ +static unsigned abort_en() +{ + if (!mapped_config_file) + { + /* Open an 8 byte file for configuration data. */ + mapped_config_file = EngMapFile(L"\\??\\c:\\gaDebug.cfg", 8, &debug_config_file); + } + /* An value of "0" (ascii) in the configuration file will clear the first 8 bits in the test byte. */ + /* An value of "1" (ascii) in the configuration file will set the first bit in the test byte. */ + /* An value of "2" (ascii) in the configuration file will set the second bit in the test byte. */ + return ((((char *)mapped_config_file)[0]) - 0x30) & eAssertAbortEn; +} +#else /* WIN32 */ +static unsigned abort_en() +{ + return !GETENV("GALLIUM_ABORT_ON_ASSERT"); +} +#endif void debug_assert_fail(const char *expr, const char *file, unsigned line) { debug_printf("%s:%i: Assertion `%s' failed.\n", file, line, expr); - debug_abort(); + if (abort_en()) + { + debug_break(); + } else + { + debug_printf("continuing...\n"); + } +} + + +#define DEBUG_MASK_TABLE_SIZE 256 + + +/** + * Mask hash table. + * + * For now we just take the lower bits of the key, and do no attempt to solve + * collisions. Use a proper hash table when we have dozens of drivers. + */ +static uint32_t debug_mask_table[DEBUG_MASK_TABLE_SIZE]; + + +void debug_mask_set(uint32_t uuid, uint32_t mask) +{ + unsigned hash = uuid & (DEBUG_MASK_TABLE_SIZE - 1); + debug_mask_table[hash] = mask; +} + + +uint32_t debug_mask_get(uint32_t uuid) +{ + unsigned hash = uuid & (DEBUG_MASK_TABLE_SIZE - 1); + return debug_mask_table[hash]; +} + + +void debug_mask_vprintf(uint32_t uuid, uint32_t what, const char *format, va_list ap) +{ + uint32_t mask = debug_mask_get(uuid); + if(mask & what) + debug_vprintf(format, ap); } diff --git a/src/gallium/auxiliary/util/p_tile.h b/src/gallium/auxiliary/util/p_tile.h index 318b6d11a6..fdc80a13b3 100644 --- a/src/gallium/auxiliary/util/p_tile.h +++ b/src/gallium/auxiliary/util/p_tile.h @@ -52,44 +52,50 @@ pipe_clip_tile(uint x, uint y, uint *w, uint *h, const struct pipe_surface *ps) return FALSE; } +#ifdef __cplusplus +extern "C" { +#endif -extern void +void pipe_get_tile_raw(struct pipe_context *pipe, struct pipe_surface *ps, uint x, uint y, uint w, uint h, void *p, int dst_stride); -extern void +void pipe_put_tile_raw(struct pipe_context *pipe, struct pipe_surface *ps, uint x, uint y, uint w, uint h, const void *p, int src_stride); -extern void +void pipe_get_tile_rgba(struct pipe_context *pipe, struct pipe_surface *ps, uint x, uint y, uint w, uint h, float *p); -extern void +void pipe_put_tile_rgba(struct pipe_context *pipe, struct pipe_surface *ps, uint x, uint y, uint w, uint h, const float *p); -extern void +void pipe_get_tile_z(struct pipe_context *pipe, struct pipe_surface *ps, uint x, uint y, uint w, uint h, uint *z); -extern void +void pipe_put_tile_z(struct pipe_context *pipe, struct pipe_surface *ps, uint x, uint y, uint w, uint h, const uint *z); +#ifdef __cplusplus +} +#endif #endif diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c new file mode 100644 index 0000000000..d9f2f8fc28 --- /dev/null +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -0,0 +1,506 @@ +/************************************************************************** + * + * Copyright 2008 Dennis Smit + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Based on the work of Eric Anholt <anholt@FreeBSD.org> + */ + +/* FIXME: clean this entire file up */ + +#include "u_cpu_detect.h" + +#ifdef __linux__ +#define OS_LINUX +#endif +#ifdef WIN32 +#define OS_WIN32 +#endif + +#if defined(ARCH_POWERPC) +#if defined(OS_DARWIN) +#include <sys/sysctl.h> +#else +#include <signal.h> +#include <setjmp.h> +#endif +#endif + +#if defined(OS_NETBSD) || defined(OS_OPENBSD) +#include <sys/param.h> +#include <sys/sysctl.h> +#include <machine/cpu.h> +#endif + +#if defined(OS_FREEBSD) +#include <sys/types.h> +#include <sys/sysctl.h> +#endif + +#if defined(OS_LINUX) +#include <signal.h> +#endif + +#if defined(OS_WIN32) +#include <windows.h> +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> + + +static struct cpu_detect_caps __cpu_detect_caps; +static int __cpu_detect_initialized = 0; + +static int has_cpuid(void); +static int cpuid(unsigned int ax, unsigned int *p); + +/* The sigill handlers */ +#if defined(ARCH_X86) /* x86 (linux katmai handler check thing) */ +#if defined(OS_LINUX) && defined(_POSIX_SOURCE) && defined(X86_FXSR_MAGIC) +static void sigill_handler_sse(int signal, struct sigcontext sc) +{ + /* Both the "xorps %%xmm0,%%xmm0" and "divps %xmm0,%%xmm1" + * instructions are 3 bytes long. We must increment the instruction + * pointer manually to avoid repeated execution of the offending + * instruction. + * + * If the SIGILL is caused by a divide-by-zero when unmasked + * exceptions aren't supported, the SIMD FPU status and control + * word will be restored at the end of the test, so we don't need + * to worry about doing it here. Besides, we may not be able to... + */ + sc.eip += 3; + + __cpu_detect_caps.hasSSE=0; +} + +static void sigfpe_handler_sse(int signal, struct sigcontext sc) +{ + if (sc.fpstate->magic != 0xffff) { + /* Our signal context has the extended FPU state, so reset the + * divide-by-zero exception mask and clear the divide-by-zero + * exception bit. + */ + sc.fpstate->mxcsr |= 0x00000200; + sc.fpstate->mxcsr &= 0xfffffffb; + } else { + /* If we ever get here, we're completely hosed. + */ + } +} +#endif +#endif /* OS_LINUX && _POSIX_SOURCE && X86_FXSR_MAGIC */ + +#if defined(OS_WIN32) +LONG CALLBACK win32_sig_handler_sse(EXCEPTION_POINTERS* ep) +{ + if(ep->ExceptionRecord->ExceptionCode==EXCEPTION_ILLEGAL_INSTRUCTION){ + ep->ContextRecord->Eip +=3; + __cpu_detect_caps.hasSSE=0; + return EXCEPTION_CONTINUE_EXECUTION; + } + return EXCEPTION_CONTINUE_SEARCH; +} +#endif /* OS_WIN32 */ + + +#if defined(ARCH_POWERPC) && !defined(OS_DARWIN) +static sigjmp_buf __lv_powerpc_jmpbuf; +static volatile sig_atomic_t __lv_powerpc_canjump = 0; + +static void sigill_handler (int sig); + +static void sigill_handler (int sig) +{ + if (!__lv_powerpc_canjump) { + signal (sig, SIG_DFL); + raise (sig); + } + + __lv_powerpc_canjump = 0; + siglongjmp(__lv_powerpc_jmpbuf, 1); +} + +static void check_os_altivec_support(void) +{ +#if defined(OS_DARWIN) + int sels[2] = {CTL_HW, HW_VECTORUNIT}; + int has_vu = 0; + int len = sizeof (has_vu); + int err; + + err = sysctl(sels, 2, &has_vu, &len, NULL, 0); + + if (err == 0) { + if (has_vu != 0) { + __cpu_detect_caps.hasAltiVec = 1; + } + } +#else /* !OS_DARWIN */ + /* no Darwin, do it the brute-force way */ + /* this is borrowed from the libmpeg2 library */ + signal(SIGILL, sigill_handler); + if (sigsetjmp(__lv_powerpc_jmpbuf, 1)) { + signal(SIGILL, SIG_DFL); + } else { + __lv_powerpc_canjump = 1; + + __asm __volatile + ("mtspr 256, %0\n\t" + "vand %%v0, %%v0, %%v0" + : + : "r" (-1)); + + signal(SIGILL, SIG_DFL); + __cpu_detect_caps.hasAltiVec = 1; + } +#endif +} +#endif + +/* If we're running on a processor that can do SSE, let's see if we + * are allowed to or not. This will catch 2.4.0 or later kernels that + * haven't been configured for a Pentium III but are running on one, + * and RedHat patched 2.2 kernels that have broken exception handling + * support for user space apps that do SSE. + */ +static void check_os_katmai_support(void) +{ +#if defined(ARCH_X86) +#if defined(OS_FREEBSD) + int has_sse=0, ret; + int len = sizeof (has_sse); + + ret = sysctlbyname("hw.instruction_sse", &has_sse, &len, NULL, 0); + if (ret || !has_sse) + __cpu_detect_caps.hasSSE=0; + +#elif defined(OS_NETBSD) || defined(OS_OPENBSD) + int has_sse, has_sse2, ret, mib[2]; + int varlen; + + mib[0] = CTL_MACHDEP; + mib[1] = CPU_SSE; + varlen = sizeof (has_sse); + + ret = sysctl(mib, 2, &has_sse, &varlen, NULL, 0); + if (ret < 0 || !has_sse) { + __cpu_detect_caps.hasSSE = 0; + } else { + __cpu_detect_caps.hasSSE = 1; + } + + mib[1] = CPU_SSE2; + varlen = sizeof (has_sse2); + ret = sysctl(mib, 2, &has_sse2, &varlen, NULL, 0); + if (ret < 0 || !has_sse2) { + __cpu_detect_caps.hasSSE2 = 0; + } else { + __cpu_detect_caps.hasSSE2 = 1; + } + __cpu_detect_caps.hasSSE = 0; /* FIXME ?!?!? */ + +#elif defined(OS_WIN32) + LPTOP_LEVEL_EXCEPTION_FILTER exc_fil; + if (__cpu_detect_caps.hasSSE) { + exc_fil = SetUnhandledExceptionFilter(win32_sig_handler_sse); + __asm __volatile ("xorps %xmm0, %xmm0"); + SetUnhandledExceptionFilter(exc_fil); + } +#elif defined(OS_LINUX) + struct sigaction saved_sigill; + struct sigaction saved_sigfpe; + + /* Save the original signal handlers. + */ + sigaction(SIGILL, NULL, &saved_sigill); + sigaction(SIGFPE, NULL, &saved_sigfpe); + + signal(SIGILL, (void (*)(int))sigill_handler_sse); + signal(SIGFPE, (void (*)(int))sigfpe_handler_sse); + + /* Emulate test for OSFXSR in CR4. The OS will set this bit if it + * supports the extended FPU save and restore required for SSE. If + * we execute an SSE instruction on a PIII and get a SIGILL, the OS + * doesn't support Streaming SIMD Exceptions, even if the processor + * does. + */ + if (__cpu_detect_caps.hasSSE) { + __asm __volatile ("xorps %xmm1, %xmm0"); + } + + /* Emulate test for OSXMMEXCPT in CR4. The OS will set this bit if + * it supports unmasked SIMD FPU exceptions. If we unmask the + * exceptions, do a SIMD divide-by-zero and get a SIGILL, the OS + * doesn't support unmasked SIMD FPU exceptions. If we get a SIGFPE + * as expected, we're okay but we need to clean up after it. + * + * Are we being too stringent in our requirement that the OS support + * unmasked exceptions? Certain RedHat 2.2 kernels enable SSE by + * setting CR4.OSFXSR but don't support unmasked exceptions. Win98 + * doesn't even support them. We at least know the user-space SSE + * support is good in kernels that do support unmasked exceptions, + * and therefore to be safe I'm going to leave this test in here. + */ + if (__cpu_detect_caps.hasSSE) { + // test_os_katmai_exception_support(); + } + + /* Restore the original signal handlers. + */ + sigaction(SIGILL, &saved_sigill, NULL); + sigaction(SIGFPE, &saved_sigfpe, NULL); + +#else + /* We can't use POSIX signal handling to test the availability of + * SSE, so we disable it by default. + */ + __cpu_detect_caps.hasSSE = 0; +#endif /* __linux__ */ +#endif +} + + +static int has_cpuid(void) +{ +#if defined(ARCH_X86) + int a, c; + + __asm __volatile + ("pushf\n" + "popl %0\n" + "movl %0, %1\n" + "xorl $0x200000, %0\n" + "push %0\n" + "popf\n" + "pushf\n" + "popl %0\n" + : "=a" (a), "=c" (c) + : + : "cc"); + + return a != c; +#else + return 0; +#endif +} + +static int cpuid(unsigned int ax, unsigned int *p) +{ +#if defined(ARCH_X86) + unsigned int flags; + + __asm __volatile + ("movl %%ebx, %%esi\n\t" + "cpuid\n\t" + "xchgl %%ebx, %%esi" + : "=a" (p[0]), "=S" (p[1]), + "=c" (p[2]), "=d" (p[3]) + : "0" (ax)); + + return 0; +#else + return -1; +#endif +} + +void cpu_detect_initialize() +{ + unsigned int regs[4]; + unsigned int regs2[4]; + + int mib[2], ncpu; + int len; + + memset(&__cpu_detect_caps, 0, sizeof (struct cpu_detect_caps)); + + /* Check for arch type */ +#if defined(ARCH_MIPS) + __cpu_detect_caps.type = CPU_DETECT_TYPE_MIPS; +#elif defined(ARCH_ALPHA) + __cpu_detect_caps.type = CPU_DETECT_TYPE_ALPHA; +#elif defined(ARCH_SPARC) + __cpu_detect_caps.type = CPU_DETECT_TYPE_SPARC; +#elif defined(ARCH_X86) + __cpu_detect_caps.type = CPU_DETECT_TYPE_X86; +#elif defined(ARCH_POWERPC) + __cpu_detect_caps.type = CPU_DETECT_TYPE_POWERPC; +#else + __cpu_detect_caps.type = CPU_DETECT_TYPE_OTHER; +#endif + + /* Count the number of CPUs in system */ +#if !defined(OS_WIN32) && !defined(OS_UNKNOWN) && defined(_SC_NPROCESSORS_ONLN) + __cpu_detect_caps.nrcpu = sysconf(_SC_NPROCESSORS_ONLN); + if (__cpu_detect_caps.nrcpu == -1) + __cpu_detect_caps.nrcpu = 1; + +#elif defined(OS_NETBSD) || defined(OS_FREEBSD) || defined(OS_OPENBSD) + + mib[0] = CTL_HW; + mib[1] = HW_NCPU; + + len = sizeof (ncpu); + sysctl(mib, 2, &ncpu, &len, NULL, 0); + __cpu_detect_caps.nrcpu = ncpu; + +#else + __cpu_detect_caps.nrcpu = 1; +#endif + +#if defined(ARCH_X86) + /* No cpuid, old 486 or lower */ + if (has_cpuid() == 0) + return; + + __cpu_detect_caps.cacheline = 32; + + /* Get max cpuid level */ + cpuid(0x00000000, regs); + + if (regs[0] >= 0x00000001) { + unsigned int cacheline; + + cpuid (0x00000001, regs2); + + __cpu_detect_caps.x86cpuType = (regs2[0] >> 8) & 0xf; + if (__cpu_detect_caps.x86cpuType == 0xf) + __cpu_detect_caps.x86cpuType = 8 + ((regs2[0] >> 20) & 255); /* use extended family (P4, IA64) */ + + /* general feature flags */ + __cpu_detect_caps.hasTSC = (regs2[3] & (1 << 8 )) >> 8; /* 0x0000010 */ + __cpu_detect_caps.hasMMX = (regs2[3] & (1 << 23 )) >> 23; /* 0x0800000 */ + __cpu_detect_caps.hasSSE = (regs2[3] & (1 << 25 )) >> 25; /* 0x2000000 */ + __cpu_detect_caps.hasSSE2 = (regs2[3] & (1 << 26 )) >> 26; /* 0x4000000 */ + __cpu_detect_caps.hasSSE3 = (regs2[2] & (1)); /* 0x0000001 */ + __cpu_detect_caps.hasSSSE3 = (regs2[2] & (1 << 9 )) >> 9; /* 0x0000020 */ + __cpu_detect_caps.hasMMX2 = __cpu_detect_caps.hasSSE; /* SSE cpus supports mmxext too */ + + cacheline = ((regs2[1] >> 8) & 0xFF) * 8; + if (cacheline > 0) + __cpu_detect_caps.cacheline = cacheline; + } + + cpuid(0x80000000, regs); + + if (regs[0] >= 0x80000001) { + + cpuid(0x80000001, regs2); + + __cpu_detect_caps.hasMMX |= (regs2[3] & (1 << 23 )) >> 23; /* 0x0800000 */ + __cpu_detect_caps.hasMMX2 |= (regs2[3] & (1 << 22 )) >> 22; /* 0x400000 */ + __cpu_detect_caps.has3DNow = (regs2[3] & (1 << 31 )) >> 31; /* 0x80000000 */ + __cpu_detect_caps.has3DNowExt = (regs2[3] & (1 << 30 )) >> 30; + } + + if (regs[0] >= 0x80000006) { + cpuid(0x80000006, regs2); + __cpu_detect_caps.cacheline = regs2[2] & 0xFF; + } + + +#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(OS_CYGWIN) || defined(OS_OPENBSD) + if (__cpu_detect_caps.hasSSE) + check_os_katmai_support(); + + if (!__cpu_detect_caps.hasSSE) { + __cpu_detect_caps.hasSSE2 = 0; + __cpu_detect_caps.hasSSE3 = 0; + __cpu_detect_caps.hasSSSE3 = 0; + } +#else + __cpu_detect_caps.hasSSE = 0; + __cpu_detect_caps.hasSSE2 = 0; + __cpu_detect_caps.hasSSE3 = 0; + __cpu_detect_caps.hasSSSE3 = 0; +#endif +#endif /* ARCH_X86 */ + +#if defined(ARCH_POWERPC) + check_os_altivec_support(); +#endif /* ARCH_POWERPC */ + + __cpu_detect_initialized = 1; +} + +struct cpu_detect_caps *cpu_detect_get_caps() +{ + return &__cpu_detect_caps; +} + +/* The getters and setters for feature flags */ +int cpu_detect_get_tsc() +{ + return __cpu_detect_caps.hasTSC; +} + +int cpu_detect_get_mmx() +{ + return __cpu_detect_caps.hasMMX; +} + +int cpu_detect_get_mmx2() +{ + return __cpu_detect_caps.hasMMX2; +} + +int cpu_detect_get_sse() +{ + return __cpu_detect_caps.hasSSE; +} + +int cpu_detect_get_sse2() +{ + return __cpu_detect_caps.hasSSE2; +} + +int cpu_detect_get_sse3() +{ + return __cpu_detect_caps.hasSSE3; +} + +int cpu_detect_get_ssse3() +{ + return __cpu_detect_caps.hasSSSE3; +} + +int cpu_detect_get_3dnow() +{ + return __cpu_detect_caps.has3DNow; +} + +int cpu_detect_get_3dnow2() +{ + return __cpu_detect_caps.has3DNowExt; +} + +int cpu_detect_get_altivec() +{ + return __cpu_detect_caps.hasAltiVec; +} + diff --git a/src/gallium/auxiliary/util/u_cpu_detect.h b/src/gallium/auxiliary/util/u_cpu_detect.h new file mode 100644 index 0000000000..1612d49286 --- /dev/null +++ b/src/gallium/auxiliary/util/u_cpu_detect.h @@ -0,0 +1,78 @@ +/************************************************************************** + * + * Copyright 2008 Dennis Smit + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + ***************************************************************************/ + +/* + * Based on the work of Eric Anholt <anholt@FreeBSD.org> + */ + +#ifndef _CPU_DETECT_H +#define _CPU_DETECT_H + +typedef enum { + CPU_DETECT_TYPE_MIPS, + CPU_DETECT_TYPE_ALPHA, + CPU_DETECT_TYPE_SPARC, + CPU_DETECT_TYPE_X86, + CPU_DETECT_TYPE_POWERPC, + CPU_DETECT_TYPE_OTHER +} cpu_detect_type; + +struct cpu_detect_caps { + cpu_detect_type type; + int nrcpu; + + /* Feature flags */ + int x86cpuType; + int cacheline; + + int hasTSC; + int hasMMX; + int hasMMX2; + int hasSSE; + int hasSSE2; + int hasSSE3; + int hasSSSE3; + int has3DNow; + int has3DNowExt; + int hasAltiVec; +}; + +/* prototypes */ +void cpu_detect_initialize(void); +struct cpu_detect_caps *cpu_detect_get_caps(void); + +int cpu_detect_get_tsc(void); +int cpu_detect_get_mmx(void); +int cpu_detect_get_mmx2(void); +int cpu_detect_get_sse(void); +int cpu_detect_get_sse2(void); +int cpu_detect_get_sse3(void); +int cpu_detect_get_ssse3(void); +int cpu_detect_get_3dnow(void); +int cpu_detect_get_3dnow2(void); +int cpu_detect_get_altivec(void); + +#endif /* _CPU_DETECT_H */ diff --git a/src/gallium/auxiliary/util/u_handle_table.c b/src/gallium/auxiliary/util/u_handle_table.c new file mode 100644 index 0000000000..8a298f7c41 --- /dev/null +++ b/src/gallium/auxiliary/util/u_handle_table.c @@ -0,0 +1,207 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Generic handle table implementation. + * + * @author José Fonseca <jrfonseca@tungstengraphics.com> + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_util.h" + +#include "u_handle_table.h" + + +#define HANDLE_TABLE_INITIAL_SIZE 16 + + +struct handle_table +{ + /** Object array. Empty handles have a null object */ + void **objects; + + /** Number of objects the handle can currently hold */ + unsigned size; + /** Number of consecutive objects allocated at the start of the table */ + unsigned filled; + + /** Optional object destructor */ + void (*destroy)(void *object); +}; + + +struct handle_table * +handle_table_create(void) +{ + struct handle_table *ht; + + ht = MALLOC_STRUCT(handle_table); + if(!ht) + return NULL; + + ht->objects = (void **)CALLOC(HANDLE_TABLE_INITIAL_SIZE, sizeof(void *)); + if(!ht->objects) { + FREE(ht); + return NULL; + } + + ht->size = HANDLE_TABLE_INITIAL_SIZE; + ht->filled = 0; + + ht->destroy = NULL; + + return ht; +} + + +void +handle_table_set_destroy(struct handle_table *ht, + void (*destroy)(void *object)) +{ + assert(ht); + ht->destroy = destroy; +} + + +unsigned +handle_table_add(struct handle_table *ht, + void *object) +{ + unsigned index; + unsigned handle; + + assert(ht); + assert(object); + if(!object) + return 0; + + /* linear search for an empty handle */ + while(ht->filled < ht->size) { + if(!ht->objects[ht->filled]) + break; + ++ht->filled; + } + + /* grow the table */ + if(ht->filled == ht->size) { + unsigned new_size; + void **new_objects; + + new_size = ht->size*2; + assert(new_size); + + new_objects = (void **)REALLOC((void *)ht->objects, + ht->size*sizeof(void *), + new_size*sizeof(void *)); + if(!new_objects) + return 0; + + memset(new_objects + ht->size, 0, (new_size - ht->size)*sizeof(void *)); + + ht->size = new_size; + ht->objects = new_objects; + } + + index = ht->filled; + + handle = index + 1; + + /* check integer overflow */ + if(!handle) + return 0; + + assert(!ht->objects[index]); + ht->objects[index] = object; + ++ht->filled; + + return handle; +} + + +void * +handle_table_get(struct handle_table *ht, + unsigned handle) +{ + void *object; + + assert(ht); + assert(handle > 0); + assert(handle <= ht->size); + if(!handle || handle > ht->size) + return NULL; + + object = ht->objects[handle - 1]; + assert(object); + + return object; +} + + +void +handle_table_remove(struct handle_table *ht, + unsigned handle) +{ + void *object; + unsigned index; + + assert(ht); + assert(handle > 0); + assert(handle <= ht->size); + if(!handle || handle > ht->size) + return; + + index = handle - 1; + object = ht->objects[index]; + assert(object); + + if(object && ht->destroy) + ht->destroy(object); + + ht->objects[index] = NULL; + if(index < ht->filled) + ht->filled = index; +} + + +void +handle_table_destroy(struct handle_table *ht) +{ + unsigned index; + assert(ht); + + if(ht->destroy) + for(index = 0; index < ht->size; ++index) + if(ht->objects[index]) + ht->destroy(ht->objects[index]); + + FREE(ht->objects); + FREE(ht); +} + diff --git a/src/gallium/auxiliary/util/u_handle_table.h b/src/gallium/auxiliary/util/u_handle_table.h new file mode 100644 index 0000000000..51fc273865 --- /dev/null +++ b/src/gallium/auxiliary/util/u_handle_table.h @@ -0,0 +1,96 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Generic handle table. + * + * @author José Fonseca <jrfonseca@tungstengraphics.com> + */ + +#ifndef U_HANDLE_TABLE_H_ +#define U_HANDLE_TABLE_H_ + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Abstract data type to map integer handles to objects. + */ +struct handle_table; + + +struct handle_table * +handle_table_create(void); + + +/** + * Set an optional destructor callback. + * + * If set, it will be called during handle_table_remove and + * handle_table_destroy calls. + */ +void +handle_table_set_destroy(struct handle_table *ht, + void (*destroy)(void *object)); + + +/** + * Add a new object. + * + * Returns a zero handle on failure (out of memory). + */ +unsigned +handle_table_add(struct handle_table *ht, + void *object); + +/** + * Fetch an existing object. + * + * Returns NULL for an invalid handle. + */ +void * +handle_table_get(struct handle_table *ht, + unsigned handle); + + +void +handle_table_remove(struct handle_table *ht, + unsigned handle); + + +void +handle_table_destroy(struct handle_table *ht); + + +#ifdef __cplusplus +} +#endif + +#endif /* U_HANDLE_TABLE_H_ */ diff --git a/src/gallium/auxiliary/util/u_snprintf.c b/src/gallium/auxiliary/util/u_snprintf.c new file mode 100644 index 0000000000..61c20b48f7 --- /dev/null +++ b/src/gallium/auxiliary/util/u_snprintf.c @@ -0,0 +1,1478 @@ +/* + * Copyright (c) 1995 Patrick Powell. + * + * This code is based on code written by Patrick Powell <papowell@astart.com>. + * It may be used for any purpose as long as this notice remains intact on all + * source code distributions. + */ + +/* + * Copyright (c) 2008 Holger Weiss. + * + * This version of the code is maintained by Holger Weiss <holger@jhweiss.de>. + * My changes to the code may freely be used, modified and/or redistributed for + * any purpose. It would be nice if additions and fixes to this file (including + * trivial code cleanups) would be sent back in order to let me include them in + * the version available at <http://www.jhweiss.de/software/snprintf.html>. + * However, this is not a requirement for using or redistributing (possibly + * modified) versions of this file, nor is leaving this notice intact mandatory. + */ + +/* + * History + * + * 2008-01-20 Holger Weiss <holger@jhweiss.de> for C99-snprintf 1.1: + * + * Fixed the detection of infinite floating point values on IRIX (and + * possibly other systems) and applied another few minor cleanups. + * + * 2008-01-06 Holger Weiss <holger@jhweiss.de> for C99-snprintf 1.0: + * + * Added a lot of new features, fixed many bugs, and incorporated various + * improvements done by Andrew Tridgell <tridge@samba.org>, Russ Allbery + * <rra@stanford.edu>, Hrvoje Niksic <hniksic@xemacs.org>, Damien Miller + * <djm@mindrot.org>, and others for the Samba, INN, Wget, and OpenSSH + * projects. The additions include: support the "e", "E", "g", "G", and + * "F" conversion specifiers (and use conversion style "f" or "F" for the + * still unsupported "a" and "A" specifiers); support the "hh", "ll", "j", + * "t", and "z" length modifiers; support the "#" flag and the (non-C99) + * "'" flag; use localeconv(3) (if available) to get both the current + * locale's decimal point character and the separator between groups of + * digits; fix the handling of various corner cases of field width and + * precision specifications; fix various floating point conversion bugs; + * handle infinite and NaN floating point values; don't attempt to write to + * the output buffer (which may be NULL) if a size of zero was specified; + * check for integer overflow of the field width, precision, and return + * values and during the floating point conversion; use the OUTCHAR() macro + * instead of a function for better performance; provide asprintf(3) and + * vasprintf(3) functions; add new test cases. The replacement functions + * have been renamed to use an "rpl_" prefix, the function calls in the + * main project (and in this file) must be redefined accordingly for each + * replacement function which is needed (by using Autoconf or other means). + * Various other minor improvements have been applied and the coding style + * was cleaned up for consistency. + * + * 2007-07-23 Holger Weiss <holger@jhweiss.de> for Mutt 1.5.13: + * + * C99 compliant snprintf(3) and vsnprintf(3) functions return the number + * of characters that would have been written to a sufficiently sized + * buffer (excluding the '\0'). The original code simply returned the + * length of the resulting output string, so that's been fixed. + * + * 1998-03-05 Michael Elkins <me@mutt.org> for Mutt 0.90.8: + * + * The original code assumed that both snprintf(3) and vsnprintf(3) were + * missing. Some systems only have snprintf(3) but not vsnprintf(3), so + * the code is now broken down under HAVE_SNPRINTF and HAVE_VSNPRINTF. + * + * 1998-01-27 Thomas Roessler <roessler@does-not-exist.org> for Mutt 0.89i: + * + * The PGP code was using unsigned hexadecimal formats. Unfortunately, + * unsigned formats simply didn't work. + * + * 1997-10-22 Brandon Long <blong@fiction.net> for Mutt 0.87.1: + * + * Ok, added some minimal floating point support, which means this probably + * requires libm on most operating systems. Don't yet support the exponent + * (e,E) and sigfig (g,G). Also, fmtint() was pretty badly broken, it just + * wasn't being exercised in ways which showed it, so that's been fixed. + * Also, formatted the code to Mutt conventions, and removed dead code left + * over from the original. Also, there is now a builtin-test, run with: + * gcc -DTEST_SNPRINTF -o snprintf snprintf.c -lm && ./snprintf + * + * 2996-09-15 Brandon Long <blong@fiction.net> for Mutt 0.43: + * + * This was ugly. It is still ugly. I opted out of floating point + * numbers, but the formatter understands just about everything from the + * normal C string format, at least as far as I can tell from the Solaris + * 2.5 printf(3S) man page. + */ + +/* + * ToDo + * + * - Add wide character support. + * - Add support for "%a" and "%A" conversions. + * - Create test routines which predefine the expected results. Our test cases + * usually expose bugs in system implementations rather than in ours :-) + */ + +/* + * Usage + * + * 1) The following preprocessor macros should be defined to 1 if the feature or + * file in question is available on the target system (by using Autoconf or + * other means), though basic functionality should be available as long as + * HAVE_STDARG_H and HAVE_STDLIB_H are defined correctly: + * + * HAVE_VSNPRINTF + * HAVE_SNPRINTF + * HAVE_VASPRINTF + * HAVE_ASPRINTF + * HAVE_STDARG_H + * HAVE_STDDEF_H + * HAVE_STDINT_H + * HAVE_STDLIB_H + * HAVE_INTTYPES_H + * HAVE_LOCALE_H + * HAVE_LOCALECONV + * HAVE_LCONV_DECIMAL_POINT + * HAVE_LCONV_THOUSANDS_SEP + * HAVE_LONG_DOUBLE + * HAVE_LONG_LONG_INT + * HAVE_UNSIGNED_LONG_LONG_INT + * HAVE_INTMAX_T + * HAVE_UINTMAX_T + * HAVE_UINTPTR_T + * HAVE_PTRDIFF_T + * HAVE_VA_COPY + * HAVE___VA_COPY + * + * 2) The calls to the functions which should be replaced must be redefined + * throughout the project files (by using Autoconf or other means): + * + * #define vsnprintf rpl_vsnprintf + * #define snprintf rpl_snprintf + * #define vasprintf rpl_vasprintf + * #define asprintf rpl_asprintf + * + * 3) The required replacement functions should be declared in some header file + * included throughout the project files: + * + * #if HAVE_CONFIG_H + * #include <config.h> + * #endif + * #if HAVE_STDARG_H + * #include <stdarg.h> + * #if !HAVE_VSNPRINTF + * int rpl_vsnprintf(char *, size_t, const char *, va_list); + * #endif + * #if !HAVE_SNPRINTF + * int rpl_snprintf(char *, size_t, const char *, ...); + * #endif + * #if !HAVE_VASPRINTF + * int rpl_vasprintf(char **, const char *, va_list); + * #endif + * #if !HAVE_ASPRINTF + * int rpl_asprintf(char **, const char *, ...); + * #endif + * #endif + * + * Autoconf macros for handling step 1 and step 2 are available at + * <http://www.jhweiss.de/software/snprintf.html>. + */ + +#if HAVE_CONFIG_H +#include <config.h> +#else +#ifdef WIN32 +#define vsnprintf rpl_vsnprintf +#define snprintf rpl_snprintf +#define HAVE_VSNPRINTF 0 +#define HAVE_SNPRINTF 0 +#define HAVE_VASPRINTF 1 /* not needed */ +#define HAVE_ASPRINTF 1 /* not needed */ +#define HAVE_STDARG_H 1 +#define HAVE_STDDEF_H 1 +#define HAVE_STDINT_H 0 +#define HAVE_STDLIB_H 1 +#define HAVE_INTTYPES_H 0 +#define HAVE_LOCALE_H 0 +#define HAVE_LOCALECONV 0 +#define HAVE_LCONV_DECIMAL_POINT 0 +#define HAVE_LCONV_THOUSANDS_SEP 0 +#define HAVE_LONG_DOUBLE 0 +#define HAVE_LONG_LONG_INT 1 +#define HAVE_UNSIGNED_LONG_LONG_INT 1 +#define HAVE_INTMAX_T 0 +#define HAVE_UINTMAX_T 0 +#define HAVE_UINTPTR_T 1 +#define HAVE_PTRDIFF_T 1 +#define HAVE_VA_COPY 0 +#define HAVE___VA_COPY 0 +#else +#define HAVE_VSNPRINTF 1 +#define HAVE_SNPRINTF 1 +#define HAVE_VASPRINTF 1 +#define HAVE_ASPRINTF 1 +#endif +#endif /* HAVE_CONFIG_H */ + +#if !HAVE_SNPRINTF || !HAVE_VSNPRINTF || !HAVE_ASPRINTF || !HAVE_VASPRINTF +#include <stdio.h> /* For NULL, size_t, vsnprintf(3), and vasprintf(3). */ +#ifdef VA_START +#undef VA_START +#endif /* defined(VA_START) */ +#ifdef VA_SHIFT +#undef VA_SHIFT +#endif /* defined(VA_SHIFT) */ +#if HAVE_STDARG_H +#include <stdarg.h> +#define VA_START(ap, last) va_start(ap, last) +#define VA_SHIFT(ap, value, type) /* No-op for ANSI C. */ +#else /* Assume <varargs.h> is available. */ +#include <varargs.h> +#define VA_START(ap, last) va_start(ap) /* "last" is ignored. */ +#define VA_SHIFT(ap, value, type) value = va_arg(ap, type) +#endif /* HAVE_STDARG_H */ + +#if !HAVE_VASPRINTF +#if HAVE_STDLIB_H +#include <stdlib.h> /* For malloc(3). */ +#endif /* HAVE_STDLIB_H */ +#ifdef VA_COPY +#undef VA_COPY +#endif /* defined(VA_COPY) */ +#ifdef VA_END_COPY +#undef VA_END_COPY +#endif /* defined(VA_END_COPY) */ +#if HAVE_VA_COPY +#define VA_COPY(dest, src) va_copy(dest, src) +#define VA_END_COPY(ap) va_end(ap) +#elif HAVE___VA_COPY +#define VA_COPY(dest, src) __va_copy(dest, src) +#define VA_END_COPY(ap) va_end(ap) +#else +#define VA_COPY(dest, src) (void)mymemcpy(&dest, &src, sizeof(va_list)) +#define VA_END_COPY(ap) /* No-op. */ +#define NEED_MYMEMCPY 1 +static void *mymemcpy(void *, void *, size_t); +#endif /* HAVE_VA_COPY */ +#endif /* !HAVE_VASPRINTF */ + +#if !HAVE_VSNPRINTF +#include <limits.h> /* For *_MAX. */ +#if HAVE_INTTYPES_H +#include <inttypes.h> /* For intmax_t (if not defined in <stdint.h>). */ +#endif /* HAVE_INTTYPES_H */ +#if HAVE_LOCALE_H +#include <locale.h> /* For localeconv(3). */ +#endif /* HAVE_LOCALE_H */ +#if HAVE_STDDEF_H +#include <stddef.h> /* For ptrdiff_t. */ +#endif /* HAVE_STDDEF_H */ +#if HAVE_STDINT_H +#include <stdint.h> /* For intmax_t. */ +#endif /* HAVE_STDINT_H */ + +/* Support for unsigned long long int. We may also need ULLONG_MAX. */ +#ifndef ULONG_MAX /* We may need ULONG_MAX as a fallback. */ +#ifdef UINT_MAX +#define ULONG_MAX UINT_MAX +#else +#define ULONG_MAX INT_MAX +#endif /* defined(UINT_MAX) */ +#endif /* !defined(ULONG_MAX) */ +#ifdef ULLONG +#undef ULLONG +#endif /* defined(ULLONG) */ +#if HAVE_UNSIGNED_LONG_LONG_INT +#define ULLONG unsigned long long int +#ifndef ULLONG_MAX +#define ULLONG_MAX ULONG_MAX +#endif /* !defined(ULLONG_MAX) */ +#else +#define ULLONG unsigned long int +#ifdef ULLONG_MAX +#undef ULLONG_MAX +#endif /* defined(ULLONG_MAX) */ +#define ULLONG_MAX ULONG_MAX +#endif /* HAVE_LONG_LONG_INT */ + +/* Support for uintmax_t. We also need UINTMAX_MAX. */ +#ifdef UINTMAX_T +#undef UINTMAX_T +#endif /* defined(UINTMAX_T) */ +#if HAVE_UINTMAX_T || defined(uintmax_t) +#define UINTMAX_T uintmax_t +#ifndef UINTMAX_MAX +#define UINTMAX_MAX ULLONG_MAX +#endif /* !defined(UINTMAX_MAX) */ +#else +#define UINTMAX_T ULLONG +#ifdef UINTMAX_MAX +#undef UINTMAX_MAX +#endif /* defined(UINTMAX_MAX) */ +#define UINTMAX_MAX ULLONG_MAX +#endif /* HAVE_UINTMAX_T || defined(uintmax_t) */ + +/* Support for long double. */ +#ifndef LDOUBLE +#if HAVE_LONG_DOUBLE +#define LDOUBLE long double +#else +#define LDOUBLE double +#endif /* HAVE_LONG_DOUBLE */ +#endif /* !defined(LDOUBLE) */ + +/* Support for long long int. */ +#ifndef LLONG +#if HAVE_LONG_LONG_INT +#define LLONG long long int +#else +#define LLONG long int +#endif /* HAVE_LONG_LONG_INT */ +#endif /* !defined(LLONG) */ + +/* Support for intmax_t. */ +#ifndef INTMAX_T +#if HAVE_INTMAX_T || defined(intmax_t) +#define INTMAX_T intmax_t +#else +#define INTMAX_T LLONG +#endif /* HAVE_INTMAX_T || defined(intmax_t) */ +#endif /* !defined(INTMAX_T) */ + +/* Support for uintptr_t. */ +#ifndef UINTPTR_T +#if HAVE_UINTPTR_T || defined(uintptr_t) +#define UINTPTR_T uintptr_t +#else +#define UINTPTR_T unsigned long int +#endif /* HAVE_UINTPTR_T || defined(uintptr_t) */ +#endif /* !defined(UINTPTR_T) */ + +/* Support for ptrdiff_t. */ +#ifndef PTRDIFF_T +#if HAVE_PTRDIFF_T || defined(ptrdiff_t) +#define PTRDIFF_T ptrdiff_t +#else +#define PTRDIFF_T long int +#endif /* HAVE_PTRDIFF_T || defined(ptrdiff_t) */ +#endif /* !defined(PTRDIFF_T) */ + +/* + * We need an unsigned integer type corresponding to ptrdiff_t (cf. C99: + * 7.19.6.1, 7). However, we'll simply use PTRDIFF_T and convert it to an + * unsigned type if necessary. This should work just fine in practice. + */ +#ifndef UPTRDIFF_T +#define UPTRDIFF_T PTRDIFF_T +#endif /* !defined(UPTRDIFF_T) */ + +/* + * We need a signed integer type corresponding to size_t (cf. C99: 7.19.6.1, 7). + * However, we'll simply use size_t and convert it to a signed type if + * necessary. This should work just fine in practice. + */ +#ifndef SSIZE_T +#define SSIZE_T size_t +#endif /* !defined(SSIZE_T) */ + +/* Either ERANGE or E2BIG should be available everywhere. */ +#ifndef ERANGE +#define ERANGE E2BIG +#endif /* !defined(ERANGE) */ +#ifndef EOVERFLOW +#define EOVERFLOW ERANGE +#endif /* !defined(EOVERFLOW) */ + +/* + * Buffer size to hold the octal string representation of UINT128_MAX without + * nul-termination ("3777777777777777777777777777777777777777777"). + */ +#ifdef MAX_CONVERT_LENGTH +#undef MAX_CONVERT_LENGTH +#endif /* defined(MAX_CONVERT_LENGTH) */ +#define MAX_CONVERT_LENGTH 43 + +/* Format read states. */ +#define PRINT_S_DEFAULT 0 +#define PRINT_S_FLAGS 1 +#define PRINT_S_WIDTH 2 +#define PRINT_S_DOT 3 +#define PRINT_S_PRECISION 4 +#define PRINT_S_MOD 5 +#define PRINT_S_CONV 6 + +/* Format flags. */ +#define PRINT_F_MINUS (1 << 0) +#define PRINT_F_PLUS (1 << 1) +#define PRINT_F_SPACE (1 << 2) +#define PRINT_F_NUM (1 << 3) +#define PRINT_F_ZERO (1 << 4) +#define PRINT_F_QUOTE (1 << 5) +#define PRINT_F_UP (1 << 6) +#define PRINT_F_UNSIGNED (1 << 7) +#define PRINT_F_TYPE_G (1 << 8) +#define PRINT_F_TYPE_E (1 << 9) + +/* Conversion flags. */ +#define PRINT_C_CHAR 1 +#define PRINT_C_SHORT 2 +#define PRINT_C_LONG 3 +#define PRINT_C_LLONG 4 +#define PRINT_C_LDOUBLE 5 +#define PRINT_C_SIZE 6 +#define PRINT_C_PTRDIFF 7 +#define PRINT_C_INTMAX 8 + +#ifndef MAX +#define MAX(x, y) ((x >= y) ? x : y) +#endif /* !defined(MAX) */ +#ifndef CHARTOINT +#define CHARTOINT(ch) (ch - '0') +#endif /* !defined(CHARTOINT) */ +#ifndef ISDIGIT +#define ISDIGIT(ch) ('0' <= (unsigned char)ch && (unsigned char)ch <= '9') +#endif /* !defined(ISDIGIT) */ +#ifndef ISNAN +#define ISNAN(x) (x != x) +#endif /* !defined(ISNAN) */ +#ifndef ISINF +#define ISINF(x) (x != 0.0 && x + x == x) +#endif /* !defined(ISINF) */ + +#ifdef OUTCHAR +#undef OUTCHAR +#endif /* defined(OUTCHAR) */ +#define OUTCHAR(str, len, size, ch) \ +do { \ + if (len + 1 < size) \ + str[len] = ch; \ + (len)++; \ +} while (/* CONSTCOND */ 0) + +static void fmtstr(char *, size_t *, size_t, const char *, int, int, int); +static void fmtint(char *, size_t *, size_t, INTMAX_T, int, int, int, int); +static void fmtflt(char *, size_t *, size_t, LDOUBLE, int, int, int, int *); +static void printsep(char *, size_t *, size_t); +static int getnumsep(int); +static int getexponent(LDOUBLE); +static int convert(UINTMAX_T, char *, size_t, int, int); +static UINTMAX_T cast(LDOUBLE); +static UINTMAX_T myround(LDOUBLE); +static LDOUBLE mypow10(int); + +int +rpl_vsnprintf(char *str, size_t size, const char *format, va_list args) +{ + LDOUBLE fvalue; + INTMAX_T value; + unsigned char cvalue; + const char *strvalue; + INTMAX_T *intmaxptr; + PTRDIFF_T *ptrdiffptr; + SSIZE_T *sizeptr; + LLONG *llongptr; + long int *longptr; + int *intptr; + short int *shortptr; + signed char *charptr; + size_t len = 0; + int overflow = 0; + int base = 0; + int cflags = 0; + int flags = 0; + int width = 0; + int precision = -1; + int state = PRINT_S_DEFAULT; + char ch = *format++; + + /* + * C99 says: "If `n' is zero, nothing is written, and `s' may be a null + * pointer." (7.19.6.5, 2) We're forgiving and allow a NULL pointer + * even if a size larger than zero was specified. At least NetBSD's + * snprintf(3) does the same, as well as other versions of this file. + * (Though some of these versions will write to a non-NULL buffer even + * if a size of zero was specified, which violates the standard.) + */ + if (str == NULL && size != 0) + size = 0; + + while (ch != '\0') + switch (state) { + case PRINT_S_DEFAULT: + if (ch == '%') + state = PRINT_S_FLAGS; + else + OUTCHAR(str, len, size, ch); + ch = *format++; + break; + case PRINT_S_FLAGS: + switch (ch) { + case '-': + flags |= PRINT_F_MINUS; + ch = *format++; + break; + case '+': + flags |= PRINT_F_PLUS; + ch = *format++; + break; + case ' ': + flags |= PRINT_F_SPACE; + ch = *format++; + break; + case '#': + flags |= PRINT_F_NUM; + ch = *format++; + break; + case '0': + flags |= PRINT_F_ZERO; + ch = *format++; + break; + case '\'': /* SUSv2 flag (not in C99). */ + flags |= PRINT_F_QUOTE; + ch = *format++; + break; + default: + state = PRINT_S_WIDTH; + break; + } + break; + case PRINT_S_WIDTH: + if (ISDIGIT(ch)) { + ch = CHARTOINT(ch); + if (width > (INT_MAX - ch) / 10) { + overflow = 1; + goto out; + } + width = 10 * width + ch; + ch = *format++; + } else if (ch == '*') { + /* + * C99 says: "A negative field width argument is + * taken as a `-' flag followed by a positive + * field width." (7.19.6.1, 5) + */ + if ((width = va_arg(args, int)) < 0) { + flags |= PRINT_F_MINUS; + width = -width; + } + ch = *format++; + state = PRINT_S_DOT; + } else + state = PRINT_S_DOT; + break; + case PRINT_S_DOT: + if (ch == '.') { + state = PRINT_S_PRECISION; + ch = *format++; + } else + state = PRINT_S_MOD; + break; + case PRINT_S_PRECISION: + if (precision == -1) + precision = 0; + if (ISDIGIT(ch)) { + ch = CHARTOINT(ch); + if (precision > (INT_MAX - ch) / 10) { + overflow = 1; + goto out; + } + precision = 10 * precision + ch; + ch = *format++; + } else if (ch == '*') { + /* + * C99 says: "A negative precision argument is + * taken as if the precision were omitted." + * (7.19.6.1, 5) + */ + if ((precision = va_arg(args, int)) < 0) + precision = -1; + ch = *format++; + state = PRINT_S_MOD; + } else + state = PRINT_S_MOD; + break; + case PRINT_S_MOD: + switch (ch) { + case 'h': + ch = *format++; + if (ch == 'h') { /* It's a char. */ + ch = *format++; + cflags = PRINT_C_CHAR; + } else + cflags = PRINT_C_SHORT; + break; + case 'l': + ch = *format++; + if (ch == 'l') { /* It's a long long. */ + ch = *format++; + cflags = PRINT_C_LLONG; + } else + cflags = PRINT_C_LONG; + break; + case 'L': + cflags = PRINT_C_LDOUBLE; + ch = *format++; + break; + case 'j': + cflags = PRINT_C_INTMAX; + ch = *format++; + break; + case 't': + cflags = PRINT_C_PTRDIFF; + ch = *format++; + break; + case 'z': + cflags = PRINT_C_SIZE; + ch = *format++; + break; + } + state = PRINT_S_CONV; + break; + case PRINT_S_CONV: + switch (ch) { + case 'd': + /* FALLTHROUGH */ + case 'i': + switch (cflags) { + case PRINT_C_CHAR: + value = (signed char)va_arg(args, int); + break; + case PRINT_C_SHORT: + value = (short int)va_arg(args, int); + break; + case PRINT_C_LONG: + value = va_arg(args, long int); + break; + case PRINT_C_LLONG: + value = va_arg(args, LLONG); + break; + case PRINT_C_SIZE: + value = va_arg(args, SSIZE_T); + break; + case PRINT_C_INTMAX: + value = va_arg(args, INTMAX_T); + break; + case PRINT_C_PTRDIFF: + value = va_arg(args, PTRDIFF_T); + break; + default: + value = va_arg(args, int); + break; + } + fmtint(str, &len, size, value, 10, width, + precision, flags); + break; + case 'X': + flags |= PRINT_F_UP; + /* FALLTHROUGH */ + case 'x': + base = 16; + /* FALLTHROUGH */ + case 'o': + if (base == 0) + base = 8; + /* FALLTHROUGH */ + case 'u': + if (base == 0) + base = 10; + flags |= PRINT_F_UNSIGNED; + switch (cflags) { + case PRINT_C_CHAR: + value = (unsigned char)va_arg(args, + unsigned int); + break; + case PRINT_C_SHORT: + value = (unsigned short int)va_arg(args, + unsigned int); + break; + case PRINT_C_LONG: + value = va_arg(args, unsigned long int); + break; + case PRINT_C_LLONG: + value = va_arg(args, ULLONG); + break; + case PRINT_C_SIZE: + value = va_arg(args, size_t); + break; + case PRINT_C_INTMAX: + value = va_arg(args, UINTMAX_T); + break; + case PRINT_C_PTRDIFF: + value = va_arg(args, UPTRDIFF_T); + break; + default: + value = va_arg(args, unsigned int); + break; + } + fmtint(str, &len, size, value, base, width, + precision, flags); + break; + case 'A': + /* Not yet supported, we'll use "%F". */ + /* FALLTHROUGH */ + case 'F': + flags |= PRINT_F_UP; + case 'a': + /* Not yet supported, we'll use "%f". */ + /* FALLTHROUGH */ + case 'f': + if (cflags == PRINT_C_LDOUBLE) + fvalue = va_arg(args, LDOUBLE); + else + fvalue = va_arg(args, double); + fmtflt(str, &len, size, fvalue, width, + precision, flags, &overflow); + if (overflow) + goto out; + break; + case 'E': + flags |= PRINT_F_UP; + /* FALLTHROUGH */ + case 'e': + flags |= PRINT_F_TYPE_E; + if (cflags == PRINT_C_LDOUBLE) + fvalue = va_arg(args, LDOUBLE); + else + fvalue = va_arg(args, double); + fmtflt(str, &len, size, fvalue, width, + precision, flags, &overflow); + if (overflow) + goto out; + break; + case 'G': + flags |= PRINT_F_UP; + /* FALLTHROUGH */ + case 'g': + flags |= PRINT_F_TYPE_G; + if (cflags == PRINT_C_LDOUBLE) + fvalue = va_arg(args, LDOUBLE); + else + fvalue = va_arg(args, double); + /* + * If the precision is zero, it is treated as + * one (cf. C99: 7.19.6.1, 8). + */ + if (precision == 0) + precision = 1; + fmtflt(str, &len, size, fvalue, width, + precision, flags, &overflow); + if (overflow) + goto out; + break; + case 'c': + cvalue = (unsigned char)va_arg(args, int); + OUTCHAR(str, len, size, cvalue); + break; + case 's': + strvalue = va_arg(args, char *); + fmtstr(str, &len, size, strvalue, width, + precision, flags); + break; + case 'p': + /* + * C99 says: "The value of the pointer is + * converted to a sequence of printing + * characters, in an implementation-defined + * manner." (C99: 7.19.6.1, 8) + */ + if ((strvalue = va_arg(args, void *)) == NULL) + /* + * We use the glibc format. BSD prints + * "0x0", SysV "0". + */ + fmtstr(str, &len, size, "(nil)", width, + -1, flags); + else { + /* + * We use the BSD/glibc format. SysV + * omits the "0x" prefix (which we emit + * using the PRINT_F_NUM flag). + */ + flags |= PRINT_F_NUM; + flags |= PRINT_F_UNSIGNED; + fmtint(str, &len, size, + (UINTPTR_T)strvalue, 16, width, + precision, flags); + } + break; + case 'n': + switch (cflags) { + case PRINT_C_CHAR: + charptr = va_arg(args, signed char *); + *charptr = len; + break; + case PRINT_C_SHORT: + shortptr = va_arg(args, short int *); + *shortptr = len; + break; + case PRINT_C_LONG: + longptr = va_arg(args, long int *); + *longptr = len; + break; + case PRINT_C_LLONG: + llongptr = va_arg(args, LLONG *); + *llongptr = len; + break; + case PRINT_C_SIZE: + /* + * C99 says that with the "z" length + * modifier, "a following `n' conversion + * specifier applies to a pointer to a + * signed integer type corresponding to + * size_t argument." (7.19.6.1, 7) + */ + sizeptr = va_arg(args, SSIZE_T *); + *sizeptr = len; + break; + case PRINT_C_INTMAX: + intmaxptr = va_arg(args, INTMAX_T *); + *intmaxptr = len; + break; + case PRINT_C_PTRDIFF: + ptrdiffptr = va_arg(args, PTRDIFF_T *); + *ptrdiffptr = len; + break; + default: + intptr = va_arg(args, int *); + *intptr = len; + break; + } + break; + case '%': /* Print a "%" character verbatim. */ + OUTCHAR(str, len, size, ch); + break; + default: /* Skip other characters. */ + break; + } + ch = *format++; + state = PRINT_S_DEFAULT; + base = cflags = flags = width = 0; + precision = -1; + break; + } +out: + if (len < size) + str[len] = '\0'; + else if (size > 0) + str[size - 1] = '\0'; + + if (overflow || len >= INT_MAX) { + return -1; + } + return (int)len; +} + +static void +fmtstr(char *str, size_t *len, size_t size, const char *value, int width, + int precision, int flags) +{ + int padlen, strln; /* Amount to pad. */ + int noprecision = (precision == -1); + + if (value == NULL) /* We're forgiving. */ + value = "(null)"; + + /* If a precision was specified, don't read the string past it. */ + for (strln = 0; value[strln] != '\0' && + (noprecision || strln < precision); strln++) + continue; + + if ((padlen = width - strln) < 0) + padlen = 0; + if (flags & PRINT_F_MINUS) /* Left justify. */ + padlen = -padlen; + + while (padlen > 0) { /* Leading spaces. */ + OUTCHAR(str, *len, size, ' '); + padlen--; + } + while (*value != '\0' && (noprecision || precision-- > 0)) { + OUTCHAR(str, *len, size, *value); + value++; + } + while (padlen < 0) { /* Trailing spaces. */ + OUTCHAR(str, *len, size, ' '); + padlen++; + } +} + +static void +fmtint(char *str, size_t *len, size_t size, INTMAX_T value, int base, int width, + int precision, int flags) +{ + UINTMAX_T uvalue; + char iconvert[MAX_CONVERT_LENGTH]; + char sign = 0; + char hexprefix = 0; + int spadlen = 0; /* Amount to space pad. */ + int zpadlen = 0; /* Amount to zero pad. */ + int pos; + int separators = (flags & PRINT_F_QUOTE); + int noprecision = (precision == -1); + + if (flags & PRINT_F_UNSIGNED) + uvalue = value; + else { + uvalue = (value >= 0) ? value : -value; + if (value < 0) + sign = '-'; + else if (flags & PRINT_F_PLUS) /* Do a sign. */ + sign = '+'; + else if (flags & PRINT_F_SPACE) + sign = ' '; + } + + pos = convert(uvalue, iconvert, sizeof(iconvert), base, + flags & PRINT_F_UP); + + if (flags & PRINT_F_NUM && uvalue != 0) { + /* + * C99 says: "The result is converted to an `alternative form'. + * For `o' conversion, it increases the precision, if and only + * if necessary, to force the first digit of the result to be a + * zero (if the value and precision are both 0, a single 0 is + * printed). For `x' (or `X') conversion, a nonzero result has + * `0x' (or `0X') prefixed to it." (7.19.6.1, 6) + */ + switch (base) { + case 8: + if (precision <= pos) + precision = pos + 1; + break; + case 16: + hexprefix = (flags & PRINT_F_UP) ? 'X' : 'x'; + break; + } + } + + if (separators) /* Get the number of group separators we'll print. */ + separators = getnumsep(pos); + + zpadlen = precision - pos - separators; + spadlen = width /* Minimum field width. */ + - separators /* Number of separators. */ + - MAX(precision, pos) /* Number of integer digits. */ + - ((sign != 0) ? 1 : 0) /* Will we print a sign? */ + - ((hexprefix != 0) ? 2 : 0); /* Will we print a prefix? */ + + if (zpadlen < 0) + zpadlen = 0; + if (spadlen < 0) + spadlen = 0; + + /* + * C99 says: "If the `0' and `-' flags both appear, the `0' flag is + * ignored. For `d', `i', `o', `u', `x', and `X' conversions, if a + * precision is specified, the `0' flag is ignored." (7.19.6.1, 6) + */ + if (flags & PRINT_F_MINUS) /* Left justify. */ + spadlen = -spadlen; + else if (flags & PRINT_F_ZERO && noprecision) { + zpadlen += spadlen; + spadlen = 0; + } + while (spadlen > 0) { /* Leading spaces. */ + OUTCHAR(str, *len, size, ' '); + spadlen--; + } + if (sign != 0) /* Sign. */ + OUTCHAR(str, *len, size, sign); + if (hexprefix != 0) { /* A "0x" or "0X" prefix. */ + OUTCHAR(str, *len, size, '0'); + OUTCHAR(str, *len, size, hexprefix); + } + while (zpadlen > 0) { /* Leading zeros. */ + OUTCHAR(str, *len, size, '0'); + zpadlen--; + } + while (pos > 0) { /* The actual digits. */ + pos--; + OUTCHAR(str, *len, size, iconvert[pos]); + if (separators > 0 && pos > 0 && pos % 3 == 0) + printsep(str, len, size); + } + while (spadlen < 0) { /* Trailing spaces. */ + OUTCHAR(str, *len, size, ' '); + spadlen++; + } +} + +static void +fmtflt(char *str, size_t *len, size_t size, LDOUBLE fvalue, int width, + int precision, int flags, int *overflow) +{ + LDOUBLE ufvalue; + UINTMAX_T intpart; + UINTMAX_T fracpart; + UINTMAX_T mask; + const char *infnan = NULL; + char iconvert[MAX_CONVERT_LENGTH]; + char fconvert[MAX_CONVERT_LENGTH]; + char econvert[4]; /* "e-12" (without nul-termination). */ + char esign = 0; + char sign = 0; + int leadfraczeros = 0; + int exponent = 0; + int emitpoint = 0; + int omitzeros = 0; + int omitcount = 0; + int padlen = 0; + int epos = 0; + int fpos = 0; + int ipos = 0; + int separators = (flags & PRINT_F_QUOTE); + int estyle = (flags & PRINT_F_TYPE_E); +#if HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT + struct lconv *lc = localeconv(); +#endif /* HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT */ + + /* + * AIX' man page says the default is 0, but C99 and at least Solaris' + * and NetBSD's man pages say the default is 6, and sprintf(3) on AIX + * defaults to 6. + */ + if (precision == -1) + precision = 6; + + if (fvalue < 0.0) + sign = '-'; + else if (flags & PRINT_F_PLUS) /* Do a sign. */ + sign = '+'; + else if (flags & PRINT_F_SPACE) + sign = ' '; + + if (ISNAN(fvalue)) + infnan = (flags & PRINT_F_UP) ? "NAN" : "nan"; + else if (ISINF(fvalue)) + infnan = (flags & PRINT_F_UP) ? "INF" : "inf"; + + if (infnan != NULL) { + if (sign != 0) + iconvert[ipos++] = sign; + while (*infnan != '\0') + iconvert[ipos++] = *infnan++; + fmtstr(str, len, size, iconvert, width, ipos, flags); + return; + } + + /* "%e" (or "%E") or "%g" (or "%G") conversion. */ + if (flags & PRINT_F_TYPE_E || flags & PRINT_F_TYPE_G) { + if (flags & PRINT_F_TYPE_G) { + /* + * For "%g" (and "%G") conversions, the precision + * specifies the number of significant digits, which + * includes the digits in the integer part. The + * conversion will or will not be using "e-style" (like + * "%e" or "%E" conversions) depending on the precision + * and on the exponent. However, the exponent can be + * affected by rounding the converted value, so we'll + * leave this decision for later. Until then, we'll + * assume that we're going to do an "e-style" conversion + * (in order to get the exponent calculated). For + * "e-style", the precision must be decremented by one. + */ + precision--; + /* + * For "%g" (and "%G") conversions, trailing zeros are + * removed from the fractional portion of the result + * unless the "#" flag was specified. + */ + if (!(flags & PRINT_F_NUM)) + omitzeros = 1; + } + exponent = getexponent(fvalue); + estyle = 1; + } + +again: + /* + * Sorry, we only support 9, 19, or 38 digits (that is, the number of + * digits of the 32-bit, the 64-bit, or the 128-bit UINTMAX_MAX value + * minus one) past the decimal point due to our conversion method. + */ + switch (sizeof(UINTMAX_T)) { + case 16: + if (precision > 38) + precision = 38; + break; + case 8: + if (precision > 19) + precision = 19; + break; + default: + if (precision > 9) + precision = 9; + break; + } + + ufvalue = (fvalue >= 0.0) ? fvalue : -fvalue; + if (estyle) /* We want exactly one integer digit. */ + ufvalue /= mypow10(exponent); + + if ((intpart = cast(ufvalue)) == UINTMAX_MAX) { + *overflow = 1; + return; + } + + /* + * Factor of ten with the number of digits needed for the fractional + * part. For example, if the precision is 3, the mask will be 1000. + */ + mask = (UINTMAX_T)mypow10(precision); + /* + * We "cheat" by converting the fractional part to integer by + * multiplying by a factor of ten. + */ + if ((fracpart = myround(mask * (ufvalue - intpart))) >= mask) { + /* + * For example, ufvalue = 2.99962, intpart = 2, and mask = 1000 + * (because precision = 3). Now, myround(1000 * 0.99962) will + * return 1000. So, the integer part must be incremented by one + * and the fractional part must be set to zero. + */ + intpart++; + fracpart = 0; + if (estyle && intpart == 10) { + /* + * The value was rounded up to ten, but we only want one + * integer digit if using "e-style". So, the integer + * part must be set to one and the exponent must be + * incremented by one. + */ + intpart = 1; + exponent++; + } + } + + /* + * Now that we know the real exponent, we can check whether or not to + * use "e-style" for "%g" (and "%G") conversions. If we don't need + * "e-style", the precision must be adjusted and the integer and + * fractional parts must be recalculated from the original value. + * + * C99 says: "Let P equal the precision if nonzero, 6 if the precision + * is omitted, or 1 if the precision is zero. Then, if a conversion + * with style `E' would have an exponent of X: + * + * - if P > X >= -4, the conversion is with style `f' (or `F') and + * precision P - (X + 1). + * + * - otherwise, the conversion is with style `e' (or `E') and precision + * P - 1." (7.19.6.1, 8) + * + * Note that we had decremented the precision by one. + */ + if (flags & PRINT_F_TYPE_G && estyle && + precision + 1 > exponent && exponent >= -4) { + precision -= exponent; + estyle = 0; + goto again; + } + + if (estyle) { + if (exponent < 0) { + exponent = -exponent; + esign = '-'; + } else + esign = '+'; + + /* + * Convert the exponent. The sizeof(econvert) is 4. So, the + * econvert buffer can hold e.g. "e+99" and "e-99". We don't + * support an exponent which contains more than two digits. + * Therefore, the following stores are safe. + */ + epos = convert(exponent, econvert, 2, 10, 0); + /* + * C99 says: "The exponent always contains at least two digits, + * and only as many more digits as necessary to represent the + * exponent." (7.19.6.1, 8) + */ + if (epos == 1) + econvert[epos++] = '0'; + econvert[epos++] = esign; + econvert[epos++] = (flags & PRINT_F_UP) ? 'E' : 'e'; + } + + /* Convert the integer part and the fractional part. */ + ipos = convert(intpart, iconvert, sizeof(iconvert), 10, 0); + if (fracpart != 0) /* convert() would return 1 if fracpart == 0. */ + fpos = convert(fracpart, fconvert, sizeof(fconvert), 10, 0); + + leadfraczeros = precision - fpos; + + if (omitzeros) { + if (fpos > 0) /* Omit trailing fractional part zeros. */ + while (omitcount < fpos && fconvert[omitcount] == '0') + omitcount++; + else { /* The fractional part is zero, omit it completely. */ + omitcount = precision; + leadfraczeros = 0; + } + precision -= omitcount; + } + + /* + * Print a decimal point if either the fractional part is non-zero + * and/or the "#" flag was specified. + */ + if (precision > 0 || flags & PRINT_F_NUM) + emitpoint = 1; + if (separators) /* Get the number of group separators we'll print. */ + separators = getnumsep(ipos); + + padlen = width /* Minimum field width. */ + - ipos /* Number of integer digits. */ + - epos /* Number of exponent characters. */ + - precision /* Number of fractional digits. */ + - separators /* Number of group separators. */ + - (emitpoint ? 1 : 0) /* Will we print a decimal point? */ + - ((sign != 0) ? 1 : 0); /* Will we print a sign character? */ + + if (padlen < 0) + padlen = 0; + + /* + * C99 says: "If the `0' and `-' flags both appear, the `0' flag is + * ignored." (7.19.6.1, 6) + */ + if (flags & PRINT_F_MINUS) /* Left justifty. */ + padlen = -padlen; + else if (flags & PRINT_F_ZERO && padlen > 0) { + if (sign != 0) { /* Sign. */ + OUTCHAR(str, *len, size, sign); + sign = 0; + } + while (padlen > 0) { /* Leading zeros. */ + OUTCHAR(str, *len, size, '0'); + padlen--; + } + } + while (padlen > 0) { /* Leading spaces. */ + OUTCHAR(str, *len, size, ' '); + padlen--; + } + if (sign != 0) /* Sign. */ + OUTCHAR(str, *len, size, sign); + while (ipos > 0) { /* Integer part. */ + ipos--; + OUTCHAR(str, *len, size, iconvert[ipos]); + if (separators > 0 && ipos > 0 && ipos % 3 == 0) + printsep(str, len, size); + } + if (emitpoint) { /* Decimal point. */ +#if HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT + if (lc->decimal_point != NULL && *lc->decimal_point != '\0') + OUTCHAR(str, *len, size, *lc->decimal_point); + else /* We'll always print some decimal point character. */ +#endif /* HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT */ + OUTCHAR(str, *len, size, '.'); + } + while (leadfraczeros > 0) { /* Leading fractional part zeros. */ + OUTCHAR(str, *len, size, '0'); + leadfraczeros--; + } + while (fpos > omitcount) { /* The remaining fractional part. */ + fpos--; + OUTCHAR(str, *len, size, fconvert[fpos]); + } + while (epos > 0) { /* Exponent. */ + epos--; + OUTCHAR(str, *len, size, econvert[epos]); + } + while (padlen < 0) { /* Trailing spaces. */ + OUTCHAR(str, *len, size, ' '); + padlen++; + } +} + +static void +printsep(char *str, size_t *len, size_t size) +{ +#if HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP + struct lconv *lc = localeconv(); + int i; + + if (lc->thousands_sep != NULL) + for (i = 0; lc->thousands_sep[i] != '\0'; i++) + OUTCHAR(str, *len, size, lc->thousands_sep[i]); + else +#endif /* HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP */ + OUTCHAR(str, *len, size, ','); +} + +static int +getnumsep(int digits) +{ + int separators = (digits - ((digits % 3 == 0) ? 1 : 0)) / 3; +#if HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP + int strln; + struct lconv *lc = localeconv(); + + /* We support an arbitrary separator length (including zero). */ + if (lc->thousands_sep != NULL) { + for (strln = 0; lc->thousands_sep[strln] != '\0'; strln++) + continue; + separators *= strln; + } +#endif /* HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP */ + return separators; +} + +static int +getexponent(LDOUBLE value) +{ + LDOUBLE tmp = (value >= 0.0) ? value : -value; + int exponent = 0; + + /* + * We check for 99 > exponent > -99 in order to work around possible + * endless loops which could happen (at least) in the second loop (at + * least) if we're called with an infinite value. However, we checked + * for infinity before calling this function using our ISINF() macro, so + * this might be somewhat paranoid. + */ + while (tmp < 1.0 && tmp > 0.0 && --exponent > -99) + tmp *= 10; + while (tmp >= 10.0 && ++exponent < 99) + tmp /= 10; + + return exponent; +} + +static int +convert(UINTMAX_T value, char *buf, size_t size, int base, int caps) +{ + const char *digits = caps ? "0123456789ABCDEF" : "0123456789abcdef"; + size_t pos = 0; + + /* We return an unterminated buffer with the digits in reverse order. */ + do { + buf[pos++] = digits[value % base]; + value /= base; + } while (value != 0 && pos < size); + + return (int)pos; +} + +static UINTMAX_T +cast(LDOUBLE value) +{ + UINTMAX_T result; + + /* + * We check for ">=" and not for ">" because if UINTMAX_MAX cannot be + * represented exactly as an LDOUBLE value (but is less than LDBL_MAX), + * it may be increased to the nearest higher representable value for the + * comparison (cf. C99: 6.3.1.4, 2). It might then equal the LDOUBLE + * value although converting the latter to UINTMAX_T would overflow. + */ + if (value >= UINTMAX_MAX) + return UINTMAX_MAX; + + result = (UINTMAX_T)value; + /* + * At least on NetBSD/sparc64 3.0.2 and 4.99.30, casting long double to + * an integer type converts e.g. 1.9 to 2 instead of 1 (which violates + * the standard). Sigh. + */ + return (result <= value) ? result : result - 1; +} + +static UINTMAX_T +myround(LDOUBLE value) +{ + UINTMAX_T intpart = cast(value); + + return ((value -= intpart) < 0.5) ? intpart : intpart + 1; +} + +static LDOUBLE +mypow10(int exponent) +{ + LDOUBLE result = 1; + + while (exponent > 0) { + result *= 10; + exponent--; + } + while (exponent < 0) { + result /= 10; + exponent++; + } + return result; +} +#endif /* !HAVE_VSNPRINTF */ + +#if !HAVE_VASPRINTF +#if NEED_MYMEMCPY +void * +mymemcpy(void *dst, void *src, size_t len) +{ + const char *from = src; + char *to = dst; + + /* No need for optimization, we use this only to replace va_copy(3). */ + while (len-- > 0) + *to++ = *from++; + return dst; +} +#endif /* NEED_MYMEMCPY */ + +int +rpl_vasprintf(char **ret, const char *format, va_list ap) +{ + size_t size; + int len; + va_list aq; + + VA_COPY(aq, ap); + len = vsnprintf(NULL, 0, format, aq); + VA_END_COPY(aq); + if (len < 0 || (*ret = malloc(size = len + 1)) == NULL) + return -1; + return vsnprintf(*ret, size, format, ap); +} +#endif /* !HAVE_VASPRINTF */ + +#if !HAVE_SNPRINTF +#if HAVE_STDARG_H +int +rpl_snprintf(char *str, size_t size, const char *format, ...) +#else +int +rpl_snprintf(va_alist) va_dcl +#endif /* HAVE_STDARG_H */ +{ +#if !HAVE_STDARG_H + char *str; + size_t size; + char *format; +#endif /* HAVE_STDARG_H */ + va_list ap; + int len; + + VA_START(ap, format); + VA_SHIFT(ap, str, char *); + VA_SHIFT(ap, size, size_t); + VA_SHIFT(ap, format, const char *); + len = vsnprintf(str, size, format, ap); + va_end(ap); + return len; +} +#endif /* !HAVE_SNPRINTF */ + +#if !HAVE_ASPRINTF +#if HAVE_STDARG_H +int +rpl_asprintf(char **ret, const char *format, ...) +#else +int +rpl_asprintf(va_alist) va_dcl +#endif /* HAVE_STDARG_H */ +{ +#if !HAVE_STDARG_H + char **ret; + char *format; +#endif /* HAVE_STDARG_H */ + va_list ap; + int len; + + VA_START(ap, format); + VA_SHIFT(ap, ret, char **); + VA_SHIFT(ap, format, const char *); + len = vasprintf(ret, format, ap); + va_end(ap); + return len; +} +#endif /* !HAVE_ASPRINTF */ +#else /* Dummy declaration to avoid empty translation unit warnings. */ +int main(void); +#endif /* !HAVE_SNPRINTF || !HAVE_VSNPRINTF || !HAVE_ASPRINTF || [...] */ + + +/* vim: set joinspaces textwidth=80: */ |