From 89498d01531cd515c769e570bf799c39fbafc8fb Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 7 Oct 2009 22:36:43 +0100 Subject: llvmpipe: import experimental softpipe rasterizer code, wip binning code WIP, does't build or run. Rasterizer code is based on Nick Capen's devmaster posts and the larrabee articles, but currently doesn't share either the performance or correctness of either... --- src/gallium/drivers/llvmpipe/Makefile | 2 - src/gallium/drivers/llvmpipe/SConscript | 2 - src/gallium/drivers/llvmpipe/lp_context.c | 26 +- src/gallium/drivers/llvmpipe/lp_context.h | 5 +- src/gallium/drivers/llvmpipe/lp_prim_setup.c | 190 --- src/gallium/drivers/llvmpipe/lp_prim_setup.h | 85 -- src/gallium/drivers/llvmpipe/lp_prim_vbuf.c | 105 +- src/gallium/drivers/llvmpipe/lp_prim_vbuf.h | 4 +- src/gallium/drivers/llvmpipe/lp_rasterizer.c | 157 +++ src/gallium/drivers/llvmpipe/lp_rasterizer.h | 112 ++ src/gallium/drivers/llvmpipe/lp_setup.c | 1432 +-------------------- src/gallium/drivers/llvmpipe/lp_setup.h | 17 +- src/gallium/drivers/llvmpipe/lp_setup_context.h | 140 ++ src/gallium/drivers/llvmpipe/lp_setup_rasterize.c | 7 + src/gallium/drivers/llvmpipe/lp_setup_tri.c | 755 +++++++++++ src/gallium/drivers/llvmpipe/lp_state_derived.c | 25 +- src/gallium/drivers/llvmpipe/lp_tile_cache.c | 353 ----- src/gallium/drivers/llvmpipe/lp_tile_cache.h | 71 - 18 files changed, 1276 insertions(+), 2212 deletions(-) delete mode 100644 src/gallium/drivers/llvmpipe/lp_prim_setup.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_prim_setup.h create mode 100644 src/gallium/drivers/llvmpipe/lp_rasterizer.c create mode 100644 src/gallium/drivers/llvmpipe/lp_rasterizer.h create mode 100644 src/gallium/drivers/llvmpipe/lp_setup_context.h create mode 100644 src/gallium/drivers/llvmpipe/lp_setup_rasterize.c create mode 100644 src/gallium/drivers/llvmpipe/lp_setup_tri.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_tile_cache.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_tile_cache.h diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 21aff1967a..8f05e5a6fd 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -32,7 +32,6 @@ C_SOURCES = \ lp_draw_arrays.c \ lp_flush.c \ lp_jit.c \ - lp_prim_setup.c \ lp_prim_vbuf.c \ lp_setup.c \ lp_query.c \ @@ -51,7 +50,6 @@ C_SOURCES = \ lp_tex_sample_c.c \ lp_tex_sample_llvm.c \ lp_texture.c \ - lp_tile_cache.c \ lp_tile_soa.c include ../../Makefile.template diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 13cd465838..344b246337 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -45,7 +45,6 @@ llvmpipe = env.ConvenienceLibrary( 'lp_draw_arrays.c', 'lp_flush.c', 'lp_jit.c', - 'lp_prim_setup.c', 'lp_prim_vbuf.c', 'lp_setup.c', 'lp_query.c', @@ -64,7 +63,6 @@ llvmpipe = env.ConvenienceLibrary( 'lp_tex_sample_c.c', 'lp_tex_sample_llvm.c', 'lp_texture.c', - 'lp_tile_cache.c', 'lp_tile_soa.c', ]) diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 202cb8ef43..57e71f3e98 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -31,13 +31,13 @@ */ #include "draw/draw_context.h" +#include "draw/draw_vbuf.h" #include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_memory.h" #include "lp_clear.h" #include "lp_context.h" #include "lp_flush.h" -#include "lp_prim_setup.h" #include "lp_prim_vbuf.h" #include "lp_state.h" #include "lp_surface.h" @@ -264,21 +264,21 @@ llvmpipe_create( struct pipe_screen *screen ) (struct tgsi_sampler **) llvmpipe->tgsi.vert_samplers_list); - llvmpipe->setup = lp_draw_render_stage(llvmpipe); - if (!llvmpipe->setup) - goto fail; - if (debug_get_bool_option( "LP_NO_RAST", FALSE )) llvmpipe->no_rast = TRUE; - if (debug_get_bool_option( "LP_NO_VBUF", FALSE )) { - /* Deprecated path -- vbuf is the intended interface to the draw module: - */ - draw_set_rasterize_stage(llvmpipe->draw, llvmpipe->setup); - } - else { - lp_init_vbuf(llvmpipe); - } + llvmpipe->vbuf_backend = lp_create_vbuf_backend(llvmpipe); + if (!llvmpipe->vbuf_backend) + goto fail; + + llvmpipe->vbuf = draw_vbuf_stage(llvmpipe->draw, llvmpipe->vbuf_backend); + if (!llvmpipe->vbuf) + goto fail; + + draw_set_rasterize_stage(llvmpipe->draw, llvmpipe->vbuf); + draw_set_render(llvmpipe->draw, llvmpipe->vbuf_backend); + + /* plug in AA line/point stages */ draw_install_aaline_stage(llvmpipe->draw, &llvmpipe->pipe); diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 8d5a0d4f1f..0b77ae58d5 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -124,9 +124,10 @@ struct llvmpipe_context { /** The primitive drawing context */ struct draw_context *draw; - struct draw_stage *setup; + + /** Draw module backend */ + struct vbuf_render *vbuf_backend; struct draw_stage *vbuf; - struct llvmpipe_vbuf_render *vbuf_render; boolean dirty_render_cache; diff --git a/src/gallium/drivers/llvmpipe/lp_prim_setup.c b/src/gallium/drivers/llvmpipe/lp_prim_setup.c deleted file mode 100644 index b14f8fb99d..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_prim_setup.c +++ /dev/null @@ -1,190 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \brief A draw stage that drives our triangle setup routines from - * within the draw pipeline. One of two ways to drive setup, the - * other being in lp_prim_vbuf.c. - * - * \author Keith Whitwell - * \author Brian Paul - */ - - -#include "lp_context.h" -#include "lp_setup.h" -#include "lp_state.h" -#include "lp_prim_setup.h" -#include "draw/draw_pipe.h" -#include "draw/draw_vertex.h" -#include "util/u_memory.h" - -/** - * Triangle setup info (derived from draw_stage). - * Also used for line drawing (taking some liberties). - */ -struct setup_stage { - struct draw_stage stage; /**< This must be first (base class) */ - - struct setup_context *setup; -}; - - - -/** - * Basically a cast wrapper. - */ -static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) -{ - return (struct setup_stage *)stage; -} - - -typedef const float (*cptrf4)[4]; - -static void -do_tri(struct draw_stage *stage, struct prim_header *prim) -{ - struct setup_stage *setup = setup_stage( stage ); - - llvmpipe_setup_tri( setup->setup, - (cptrf4)prim->v[0]->data, - (cptrf4)prim->v[1]->data, - (cptrf4)prim->v[2]->data ); -} - -static void -do_line(struct draw_stage *stage, struct prim_header *prim) -{ - struct setup_stage *setup = setup_stage( stage ); - - llvmpipe_setup_line( setup->setup, - (cptrf4)prim->v[0]->data, - (cptrf4)prim->v[1]->data ); -} - -static void -do_point(struct draw_stage *stage, struct prim_header *prim) -{ - struct setup_stage *setup = setup_stage( stage ); - - llvmpipe_setup_point( setup->setup, - (cptrf4)prim->v[0]->data ); -} - - - - -static void setup_begin( struct draw_stage *stage ) -{ - struct setup_stage *setup = setup_stage(stage); - - llvmpipe_setup_prepare( setup->setup ); - - stage->point = do_point; - stage->line = do_line; - stage->tri = do_tri; -} - - -static void setup_first_point( struct draw_stage *stage, - struct prim_header *header ) -{ - setup_begin(stage); - stage->point( stage, header ); -} - -static void setup_first_line( struct draw_stage *stage, - struct prim_header *header ) -{ - setup_begin(stage); - stage->line( stage, header ); -} - - -static void setup_first_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - setup_begin(stage); - stage->tri( stage, header ); -} - - - -static void setup_flush( struct draw_stage *stage, - unsigned flags ) -{ - stage->point = setup_first_point; - stage->line = setup_first_line; - stage->tri = setup_first_tri; -} - - -static void reset_stipple_counter( struct draw_stage *stage ) -{ -} - - -static void render_destroy( struct draw_stage *stage ) -{ - struct setup_stage *ssetup = setup_stage(stage); - llvmpipe_setup_destroy_context(ssetup->setup); - FREE( stage ); -} - - -/** - * Create a new primitive setup/render stage. - */ -struct draw_stage *lp_draw_render_stage( struct llvmpipe_context *llvmpipe ) -{ - struct setup_stage *sstage = CALLOC_STRUCT(setup_stage); - - sstage->setup = llvmpipe_setup_create_context(llvmpipe); - sstage->stage.draw = llvmpipe->draw; - sstage->stage.point = setup_first_point; - sstage->stage.line = setup_first_line; - sstage->stage.tri = setup_first_tri; - sstage->stage.flush = setup_flush; - sstage->stage.reset_stipple_counter = reset_stipple_counter; - sstage->stage.destroy = render_destroy; - - return (struct draw_stage *)sstage; -} - -struct setup_context * -lp_draw_setup_context( struct draw_stage *stage ) -{ - struct setup_stage *ssetup = setup_stage(stage); - return ssetup->setup; -} - -void -lp_draw_flush( struct draw_stage *stage ) -{ - stage->flush( stage, 0 ); -} diff --git a/src/gallium/drivers/llvmpipe/lp_prim_setup.h b/src/gallium/drivers/llvmpipe/lp_prim_setup.h deleted file mode 100644 index da6cae6375..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_prim_setup.h +++ /dev/null @@ -1,85 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef LP_PRIM_SETUP_H -#define LP_PRIM_SETUP_H - - -/** - * vbuf is a special stage to gather the stream of triangles, lines, points - * together and reconstruct vertex buffers for hardware upload. - * - * First attempt, work in progress. - * - * TODO: - * - separate out vertex buffer building and primitive emit, ie >1 draw per vb. - * - tell vbuf stage how to build hw vertices directly - * - pass vbuf stage a buffer pointer for direct emit to agp/vram. - * - * - * - * Vertices are just an array of floats, with all the attributes - * packed. We currently assume a layout like: - * - * attr[0][0..3] - window position - * attr[1..n][0..3] - remaining attributes. - * - * Attributes are assumed to be 4 floats wide but are packed so that - * all the enabled attributes run contiguously. - */ - - -struct draw_stage; -struct llvmpipe_context; - - -typedef void (*vbuf_draw_func)( struct pipe_context *pipe, - unsigned prim, - const ushort *elements, - unsigned nr_elements, - const void *vertex_buffer, - unsigned nr_vertices ); - - -extern struct draw_stage * -lp_draw_render_stage( struct llvmpipe_context *llvmpipe ); - -extern struct setup_context * -lp_draw_setup_context( struct draw_stage * ); - -extern void -lp_draw_flush( struct draw_stage * ); - - -extern struct draw_stage * -lp_draw_vbuf_stage( struct draw_context *draw_context, - struct pipe_context *pipe, - vbuf_draw_func draw ); - - -#endif /* LP_PRIM_SETUP_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c index c394dcb61d..e244ac9087 100644 --- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c @@ -39,7 +39,6 @@ #include "lp_context.h" #include "lp_state.h" #include "lp_prim_vbuf.h" -#include "lp_prim_setup.h" #include "lp_setup.h" #include "draw/draw_context.h" #include "draw/draw_vbuf.h" @@ -59,6 +58,8 @@ struct llvmpipe_vbuf_render { struct vbuf_render base; struct llvmpipe_context *llvmpipe; + struct setup_context *setup; + uint prim; uint vertex_size; uint nr_vertices; @@ -75,6 +76,11 @@ llvmpipe_vbuf_render(struct vbuf_render *vbr) } + + + + + static const struct vertex_info * lp_vbuf_get_vertex_info(struct vbuf_render *vbr) { @@ -105,36 +111,6 @@ lp_vbuf_allocate_vertices(struct vbuf_render *vbr, static void lp_vbuf_release_vertices(struct vbuf_render *vbr) { -#if 0 - { - struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - const struct vertex_info *info = - llvmpipe_get_vbuf_vertex_info(cvbr->llvmpipe); - const float *vtx = (const float *) cvbr->vertex_buffer; - uint i, j; - debug_printf("%s (vtx_size = %u, vtx_used = %u)\n", - __FUNCTION__, cvbr->vertex_size, cvbr->nr_vertices); - for (i = 0; i < cvbr->nr_vertices; i++) { - for (j = 0; j < info->num_attribs; j++) { - uint k; - switch (info->attrib[j].emit) { - case EMIT_4F: k = 4; break; - case EMIT_3F: k = 3; break; - case EMIT_2F: k = 2; break; - case EMIT_1F: k = 1; break; - default: assert(0); - } - debug_printf("Vert %u attr %u: ", i, j); - while (k-- > 0) { - debug_printf("%g ", vtx[0]); - vtx++; - } - debug_printf("\n"); - } - } - } -#endif - /* keep the old allocation for next time */ } @@ -160,12 +136,8 @@ static boolean lp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) { struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); + struct setup_context *setup_ctx = cvbr->setup; - /* XXX: break this dependency - make setup_context live under - * llvmpipe, rename the old "setup" draw stage to something else. - */ - struct setup_context *setup_ctx = lp_draw_setup_context(cvbr->llvmpipe->setup); - llvmpipe_setup_prepare( setup_ctx ); cvbr->llvmpipe->reduced_prim = u_reduced_prim(prim); @@ -193,14 +165,9 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) struct llvmpipe_context *llvmpipe = cvbr->llvmpipe; const unsigned stride = llvmpipe->vertex_info_vbuf.size * sizeof(float); const void *vertex_buffer = cvbr->vertex_buffer; + struct setup_context *setup_ctx = cvbr->setup; unsigned i; - /* XXX: break this dependency - make setup_context live under - * llvmpipe, rename the old "setup" draw stage to something else. - */ - struct draw_stage *setup = llvmpipe->setup; - struct setup_context *setup_ctx = lp_draw_setup_context(setup); - switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { @@ -367,11 +334,6 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) default: assert(0); } - - /* XXX: why are we calling this??? If we had to call something, it - * would be a function in lp_setup.c: - */ - lp_draw_flush( setup ); } @@ -384,17 +346,12 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) { struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); struct llvmpipe_context *llvmpipe = cvbr->llvmpipe; + struct setup_context *setup_ctx = cvbr->setup; const unsigned stride = llvmpipe->vertex_info_vbuf.size * sizeof(float); const void *vertex_buffer = (void *) get_vert(cvbr->vertex_buffer, start, stride); unsigned i; - /* XXX: break this dependency - make setup_context live under - * llvmpipe, rename the old "setup" draw stage to something else. - */ - struct draw_stage *setup = llvmpipe->setup; - struct setup_context *setup_ctx = lp_draw_setup_context(setup); - switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { @@ -568,40 +525,38 @@ static void lp_vbuf_destroy(struct vbuf_render *vbr) { struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - cvbr->llvmpipe->vbuf_render = NULL; + llvmpipe_setup_destroy_context(cvbr->setup); FREE(cvbr); } /** - * Initialize the post-transform vertex buffer information for the given - * context. + * Create the post-transform vertex handler for the given context. */ -void -lp_init_vbuf(struct llvmpipe_context *lp) +struct vbuf_render * +lp_create_vbuf_backend(struct llvmpipe_context *lp) { - assert(lp->draw); + struct llvmpipe_vbuf_render *cvbr = CALLOC_STRUCT(llvmpipe_vbuf_render); - lp->vbuf_render = CALLOC_STRUCT(llvmpipe_vbuf_render); + assert(lp->draw); - lp->vbuf_render->base.max_indices = LP_MAX_VBUF_INDEXES; - lp->vbuf_render->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE; - lp->vbuf_render->base.get_vertex_info = lp_vbuf_get_vertex_info; - lp->vbuf_render->base.allocate_vertices = lp_vbuf_allocate_vertices; - lp->vbuf_render->base.map_vertices = lp_vbuf_map_vertices; - lp->vbuf_render->base.unmap_vertices = lp_vbuf_unmap_vertices; - lp->vbuf_render->base.set_primitive = lp_vbuf_set_primitive; - lp->vbuf_render->base.draw = lp_vbuf_draw; - lp->vbuf_render->base.draw_arrays = lp_vbuf_draw_arrays; - lp->vbuf_render->base.release_vertices = lp_vbuf_release_vertices; - lp->vbuf_render->base.destroy = lp_vbuf_destroy; + cvbr->base.max_indices = LP_MAX_VBUF_INDEXES; + cvbr->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE; - lp->vbuf_render->llvmpipe = lp; + cvbr->base.get_vertex_info = lp_vbuf_get_vertex_info; + cvbr->base.allocate_vertices = lp_vbuf_allocate_vertices; + cvbr->base.map_vertices = lp_vbuf_map_vertices; + cvbr->base.unmap_vertices = lp_vbuf_unmap_vertices; + cvbr->base.set_primitive = lp_vbuf_set_primitive; + cvbr->base.draw = lp_vbuf_draw; + cvbr->base.draw_arrays = lp_vbuf_draw_arrays; + cvbr->base.release_vertices = lp_vbuf_release_vertices; + cvbr->base.destroy = lp_vbuf_destroy; - lp->vbuf = draw_vbuf_stage(lp->draw, &lp->vbuf_render->base); + cvbr->llvmpipe = lp; - draw_set_rasterize_stage(lp->draw, lp->vbuf); + cvbr->setup = llvmpipe_setup_create_context(cvbr->llvmpipe); - draw_set_render(lp->draw, &lp->vbuf_render->base); + return &cvbr->base; } diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h index 6c4e6063e6..0676e2f42a 100644 --- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h +++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h @@ -31,8 +31,8 @@ struct llvmpipe_context; -extern void -lp_init_vbuf(struct llvmpipe_context *llvmpipe); +extern struct vbuf_render * +lp_create_vbuf_backend(struct llvmpipe_context *llvmpipe); #endif /* LP_VBUF_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_rasterizer.c new file mode 100644 index 0000000000..089ea59729 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_rasterizer.c @@ -0,0 +1,157 @@ + +struct lp_rasterizer { + + /* We can choose whatever layout for the internal tile storage we + * prefer: + */ + struct { + unsigned color[TILESIZE][TILESIZE]; + unsigned depth[TILESIZE][TILESIZE]; + char stencil[TILESIZE][TILESIZE]; + } tile; + + + unsigned x; + unsigned y; + + + struct { + struct pipe_surface *color; + struct pipe_surface *zstencil; + unsigned clear_color; + unsigned clear_depth; + char clear_stencil; + } state; +}; + +struct lp_rasterizer *lp_rast_create( void ) +{ + return CALLOC_STRUCT(lp_rasterizer); +} + +void lp_rast_bind_surfaces( struct lp_rasterizer *, + struct pipe_surface *color, + struct pipe_surface *zstencil, + const float *clear_color, + double clear_depth, + unsigned clear_stencil) +{ + pipe_surface_reference(&rast->state.color, color); + pipe_surface_reference(&rast->state.depth, depth); + rast->state.clear_color = util_pack_8888(clear_color); + rast->state.clear_depth = clear_depth * 0xffffffff; + rast->state.clear_stencil = clear_stencil; +} + +/* Begining of each tile: + */ +void lp_rast_start_tile( struct lp_rasterizer *, + unsigned x, + unsigned y ) +{ + rast->x = x; + rast->y = y; +} + +void lp_rast_clear_color( struct lp_rasterizer *rast ) +{ + const unsigned clear_color = rast->state.clear_color; + unsigned i, j; + + for (i = 0; i < TILESIZE; i++) + for (j = 0; j < TILESIZE; j++) + rast->tile[i][j] = clear_color; +} + +void lp_rast_clear_depth( struct lp_rasterizer *rast ) +{ + const unsigned clear_depth = rast->state.clear_depth; + unsigned i, j; + + for (i = 0; i < TILESIZE; i++) + for (j = 0; j < TILESIZE; j++) + rast->tile[i][j] = clear_depth; +} + +void lp_rast_clear_stencil( struct lp_rasterizer *rast ) +{ + const unsigned clear_stencil = rast->state.clear_stencil; + + memset(rast->tile.stencil, clear_stencil, sizeof rast->tile.stencil ); +} + +void lp_rast_load_color( struct lp_rasterizer *rast ) +{ + /* call u_tile func to load colors from surface */ +} + +void lp_rast_load_zstencil( struct lp_rasterizer *rast ) +{ + /* call u_tile func to load depth (and stencil?) from surface */ +} + +/* Within a tile: + */ +void lp_rast_set_state( struct lp_rasterizer *rast, + const struct lp_rast_state *state ) +{ + rast->shader_state = state; +} + +void lp_rast_triangle( struct lp_rasterizer *rast, + const struct lp_rast_triangle *inputs ) +{ + /* Set up the silly quad coef pointers + */ + for (i = 0; i < 4; i++) { + rast->quads[i].posCoef = inputs->posCoef; + rast->quads[i].coef = inputs->coef; + } + + /* Scan the tile in 4x4 chunks (?) and figure out which bits to + * rasterize: + */ + +} + +void lp_rast_shade_tile( struct lp_rasterizer *rast, + const struct lp_rast_shader_inputs *inputs ) +{ + /* Set up the silly quad coef pointers + */ + for (i = 0; i < 4; i++) { + rast->quads[i].posCoef = inputs->posCoef; + rast->quads[i].coef = inputs->coef; + } + + /* Use the existing preference for 8x2 (four quads) shading: + */ + for (i = 0; i < TILESIZE; i += 8) { + for (j = 0; j < TILESIZE; j += 2) { + rast->shader_state.shade( inputs->jc, + rast->x + i, + rast->y + j, + rast->quads, 4 ); + } + } +} + +/* End of tile: + */ +void lp_rast_store_color( struct lp_rasterizer *rast ) +{ + /* call u_tile func to store colors to surface */ +} + +void lp_rast_store_zstencil( struct lp_rasterizer *rast ) +{ + /* call u_tile func to store depth/stencil to surface */ +} + +/* Shutdown: + */ +void lp_rast_destroy( struct lp_rasterizer *rast ) +{ + FREE(rast); +} + diff --git a/src/gallium/drivers/llvmpipe/lp_rasterizer.h b/src/gallium/drivers/llvmpipe/lp_rasterizer.h new file mode 100644 index 0000000000..b3ae06a116 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_rasterizer.h @@ -0,0 +1,112 @@ + +/* Initially create and program a single rasterizer directly. Later + * will want multiple of these, one or two per core. At that stage + * will probably pass command buffers into the rasterizers rather than + * individual function calls like this. + */ +struct lp_rasterizer; + +struct lp_rast_state { + /* State: + */ + struct lp_jit_context jc; + + /* Shader itself: + */ +}; + +/* Coefficients necessary to run the shader at a given location: + */ +struct lp_rast_shader_inputs { + + /* Current rasterizer state: + */ + const struct lp_rast_state *state; + + /* Attribute interpolation: + */ + float oneoverarea; + float x1; + float y1; + + struct tgsi_interp_coef position_coef; + struct tgsi_interp_coef *coef; +}; + + +/* Rasterization information for a triangle known to be in this bin, + * plus inputs to run the shader: + */ +struct lp_rast_triangle { + /* one-pixel sized trivial accept offsets for each plane */ + float ei1; + float ei2; + float ei3; + + /* one-pixel sized trivial reject offsets for each plane */ + float eo1; + float eo2; + float eo3; + + /* y deltas for vertex pairs */ + float dy12; + float dy23; + float dy31; + + /* x deltas for vertex pairs */ + float dx12; + float dx23; + float dx31; + + /* State to run the shader: */ + struct lp_rast_shader_inputs inputs; +}; + + + +struct lp_rasterizer *lp_rast_create( void ); + +void lp_rast_bind_surfaces( struct lp_rasterizer *, + struct pipe_surface *color, + struct pipe_surface *zstencil, + const float *clear_color, + double clear_depth, + unsigned clear_stencil); + +/* Begining of each tile: + */ +void lp_rast_start_tile( struct lp_rasterizer *, + unsigned x, + unsigned y ); + +void lp_rast_clear_color( struct lp_rasterizer * ); + +void lp_rast_clear_zstencil( struct lp_rasterizer * ); + +void lp_rast_load_color( struct lp_rasterizer * ); + +void lp_rast_load_zstencil( struct lp_rasterizer * ); + + +/* Within a tile: + */ +void lp_rast_set_state( struct lp_rasterizer *, + const struct lp_rast_state * ); + +void lp_rast_triangle( struct lp_rasterizer *, + const struct lp_rast_triangle * ); + +void lp_rast_shade_tile( struct lp_rasterizer *, + const struct lp_rast_shader_inputs * ); + +/* End of tile: + */ +void lp_rast_store_color( struct lp_rasterizer * ); + +void lp_rast_store_zstencil( struct lp_rasterizer * ); + + +/* Shutdown: + */ +void lp_rast_destroy( struct lp_rasterizer * ); + diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 60107214df..8c67524506 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -26,15 +26,15 @@ **************************************************************************/ /** - * \brief Primitive rasterization/rendering (points, lines, triangles) + * \brief Primitive rasterization/rendering (points, lines) * * \author Keith Whitwell * \author Brian Paul */ #include "lp_context.h" -#include "lp_prim_setup.h" #include "lp_quad.h" +#include "lp_quad_pipe.h" #include "lp_setup.h" #include "lp_state.h" #include "draw/draw_context.h" @@ -44,1397 +44,49 @@ #include "pipe/p_thread.h" #include "util/u_math.h" #include "util/u_memory.h" -#include "lp_bld_debug.h" -#include "lp_tile_cache.h" -#include "lp_tile_soa.h" #define DEBUG_VERTS 0 -#define DEBUG_FRAGS 0 -/** - * Triangle edge info - */ -struct edge { - float dx; /**< X(v1) - X(v0), used only during setup */ - float dy; /**< Y(v1) - Y(v0), used only during setup */ - float dxdy; /**< dx/dy */ - float sx, sy; /**< first sample point coord */ - int lines; /**< number of lines on this edge */ -}; - - -#define MAX_QUADS 16 - - -/** - * Triangle setup info (derived from draw_stage). - * Also used for line drawing (taking some liberties). - */ -struct setup_context { - struct llvmpipe_context *llvmpipe; - - /* Vertices are just an array of floats making up each attribute in - * turn. Currently fixed at 4 floats, but should change in time. - * Codegen will help cope with this. - */ - const float (*vmax)[4]; - const float (*vmid)[4]; - const float (*vmin)[4]; - const float (*vprovoke)[4]; - - struct edge ebot; - struct edge etop; - struct edge emaj; - - float oneoverarea; - int facing; - - struct quad_header quad[MAX_QUADS]; - struct quad_header *quad_ptrs[MAX_QUADS]; - unsigned count; - - struct quad_interp_coef coef; - - struct { - int left[2]; /**< [0] = row0, [1] = row1 */ - int right[2]; - int y; - } span; - -#if DEBUG_FRAGS - uint numFragsEmitted; /**< per primitive */ - uint numFragsWritten; /**< per primitive */ -#endif - - unsigned winding; /* which winding to cull */ -}; - - - -/** - * Execute fragment shader for the four fragments in the quad. - */ -static void -shade_quads(struct llvmpipe_context *llvmpipe, - struct quad_header *quads[], - unsigned nr) -{ - struct lp_fragment_shader *fs = llvmpipe->fs; - struct quad_header *quad = quads[0]; - const unsigned x = quad->input.x0; - const unsigned y = quad->input.y0; - uint8_t *tile; - uint8_t *color; - void *depth; - uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS]; - unsigned chan_index; - unsigned q; - - assert(fs->current); - if(!fs->current) - return; - - /* Sanity checks */ - assert(nr * QUAD_SIZE == TILE_VECTOR_HEIGHT * TILE_VECTOR_WIDTH); - assert(x % TILE_VECTOR_WIDTH == 0); - assert(y % TILE_VECTOR_HEIGHT == 0); - for (q = 0; q < nr; ++q) { - assert(quads[q]->input.x0 == x + q*2); - assert(quads[q]->input.y0 == y); - } - - /* mask */ - for (q = 0; q < 4; ++q) - for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index) - mask[q][chan_index] = quads[q]->inout.mask & (1 << chan_index) ? ~0 : 0; - - /* color buffer */ - if(llvmpipe->framebuffer.nr_cbufs >= 1 && - llvmpipe->framebuffer.cbufs[0]) { - tile = lp_get_cached_tile(llvmpipe->cbuf_cache[0], x, y); - color = &TILE_PIXEL(tile, x & (TILE_SIZE-1), y & (TILE_SIZE-1), 0); - } - else - color = NULL; - - /* depth buffer */ - if(llvmpipe->zsbuf_map) { - assert((x % 2) == 0); - assert((y % 2) == 0); - depth = llvmpipe->zsbuf_map + - y*llvmpipe->zsbuf_transfer->stride + - 2*x*llvmpipe->zsbuf_transfer->block.size; - } - else - depth = NULL; - - /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ - assert(lp_check_alignment(mask, 16)); - - assert(lp_check_alignment(depth, 16)); - assert(lp_check_alignment(color, 16)); - assert(lp_check_alignment(llvmpipe->jit_context.blend_color, 16)); - - /* run shader */ - fs->current->jit_function( &llvmpipe->jit_context, - x, y, - quad->coef->a0, - quad->coef->dadx, - quad->coef->dady, - &mask[0][0], - color, - depth); -} - - - - -/** - * Do triangle cull test using tri determinant (sign indicates orientation) - * \return true if triangle is to be culled. - */ -static INLINE boolean -cull_tri(const struct setup_context *setup, float det) -{ - if (det != 0) { - /* if (det < 0 then Z points toward camera and triangle is - * counter-clockwise winding. - */ - unsigned winding = (det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW; - - if ((winding & setup->winding) == 0) - return FALSE; - } - - /* Culled: - */ - return TRUE; -} - - - -/** - * Clip setup->quad against the scissor/surface bounds. - */ -static INLINE void -quad_clip( struct setup_context *setup, struct quad_header *quad ) -{ - const struct pipe_scissor_state *cliprect = &setup->llvmpipe->cliprect; - const int minx = (int) cliprect->minx; - const int maxx = (int) cliprect->maxx; - const int miny = (int) cliprect->miny; - const int maxy = (int) cliprect->maxy; - - if (quad->input.x0 >= maxx || - quad->input.y0 >= maxy || - quad->input.x0 + 1 < minx || - quad->input.y0 + 1 < miny) { - /* totally clipped */ - quad->inout.mask = 0x0; - return; - } - if (quad->input.x0 < minx) - quad->inout.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); - if (quad->input.y0 < miny) - quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); - if (quad->input.x0 == maxx - 1) - quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); - if (quad->input.y0 == maxy - 1) - quad->inout.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); -} - - - -/** - * Given an X or Y coordinate, return the block/quad coordinate that it - * belongs to. - */ -static INLINE int block( int x ) -{ - return x & ~(2-1); -} - -static INLINE int block_x( int x ) -{ - return x & ~(TILE_VECTOR_WIDTH - 1); -} - - -/** - * Emit a quad (pass to next stage) with clipping. - */ -static INLINE void -clip_emit_quad( struct setup_context *setup, struct quad_header *quad ) -{ - quad_clip( setup, quad ); - - if (quad->inout.mask) { - struct llvmpipe_context *lp = setup->llvmpipe; - -#if 1 - /* XXX: The blender expects 4 quads. This is far from efficient, but - * until we codegenerate single-quad variants of the fragment pipeline - * we need this hack. */ - const unsigned nr_quads = TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH/QUAD_SIZE; - struct quad_header quads[nr_quads]; - struct quad_header *quad_ptrs[nr_quads]; - int x0 = block_x(quad->input.x0); - unsigned i; - - for(i = 0; i < nr_quads; ++i) { - int x = x0 + 2*i; - if(x == quad->input.x0) - memcpy(&quads[i], quad, sizeof quads[i]); - else { - memset(&quads[i], 0, sizeof quads[i]); - quads[i].input.x0 = x; - quads[i].input.y0 = quad->input.y0; - quads[i].coef = quad->coef; - } - quad_ptrs[i] = &quads[i]; - } - - shade_quads( lp, quad_ptrs, nr_quads ); -#else - shade_quads( lp, &quad, 1 ); -#endif - } -} - - -/** - * Render a horizontal span of quads - */ -static void flush_spans( struct setup_context *setup ) -{ - const int step = TILE_VECTOR_WIDTH; - const int xleft0 = setup->span.left[0]; - const int xleft1 = setup->span.left[1]; - const int xright0 = setup->span.right[0]; - const int xright1 = setup->span.right[1]; - - - int minleft = block_x(MIN2(xleft0, xleft1)); - int maxright = MAX2(xright0, xright1); - int x; - - for (x = minleft; x < maxright; x += step) { - unsigned skip_left0 = CLAMP(xleft0 - x, 0, step); - unsigned skip_left1 = CLAMP(xleft1 - x, 0, step); - unsigned skip_right0 = CLAMP(x + step - xright0, 0, step); - unsigned skip_right1 = CLAMP(x + step - xright1, 0, step); - unsigned lx = x; - const unsigned nr_quads = TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH/QUAD_SIZE; - unsigned q = 0; - - unsigned skipmask_left0 = (1U << skip_left0) - 1U; - unsigned skipmask_left1 = (1U << skip_left1) - 1U; - - /* These calculations fail when step == 32 and skip_right == 0. - */ - unsigned skipmask_right0 = ~0U << (unsigned)(step - skip_right0); - unsigned skipmask_right1 = ~0U << (unsigned)(step - skip_right1); - - unsigned mask0 = ~skipmask_left0 & ~skipmask_right0; - unsigned mask1 = ~skipmask_left1 & ~skipmask_right1; - - if (mask0 | mask1) { - for(q = 0; q < nr_quads; ++q) { - unsigned quadmask = (mask0 & 3) | ((mask1 & 3) << 2); - setup->quad[q].input.x0 = lx; - setup->quad[q].input.y0 = setup->span.y; - setup->quad[q].inout.mask = quadmask; - setup->quad_ptrs[q] = &setup->quad[q]; - mask0 >>= 2; - mask1 >>= 2; - lx += 2; - } - assert(!(mask0 | mask1)); - - shade_quads(setup->llvmpipe, setup->quad_ptrs, nr_quads ); - } - } - - - setup->span.y = 0; - setup->span.right[0] = 0; - setup->span.right[1] = 0; - setup->span.left[0] = 1000000; /* greater than right[0] */ - setup->span.left[1] = 1000000; /* greater than right[1] */ -} - - -#if DEBUG_VERTS -static void print_vertex(const struct setup_context *setup, - const float (*v)[4]) -{ - int i; - debug_printf(" Vertex: (%p)\n", v); - for (i = 0; i < setup->quad[0].nr_attrs; i++) { - debug_printf(" %d: %f %f %f %f\n", i, - v[i][0], v[i][1], v[i][2], v[i][3]); - if (util_is_inf_or_nan(v[i][0])) { - debug_printf(" NaN!\n"); - } - } -} -#endif - -/** - * Sort the vertices from top to bottom order, setting up the triangle - * edge fields (ebot, emaj, etop). - * \return FALSE if coords are inf/nan (cull the tri), TRUE otherwise - */ -static boolean setup_sort_vertices( struct setup_context *setup, - float det, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4] ) -{ - setup->vprovoke = v2; - - /* determine bottom to top order of vertices */ - { - float y0 = v0[0][1]; - float y1 = v1[0][1]; - float y2 = v2[0][1]; - if (y0 <= y1) { - if (y1 <= y2) { - /* y0<=y1<=y2 */ - setup->vmin = v0; - setup->vmid = v1; - setup->vmax = v2; - } - else if (y2 <= y0) { - /* y2<=y0<=y1 */ - setup->vmin = v2; - setup->vmid = v0; - setup->vmax = v1; - } - else { - /* y0<=y2<=y1 */ - setup->vmin = v0; - setup->vmid = v2; - setup->vmax = v1; - } - } - else { - if (y0 <= y2) { - /* y1<=y0<=y2 */ - setup->vmin = v1; - setup->vmid = v0; - setup->vmax = v2; - } - else if (y2 <= y1) { - /* y2<=y1<=y0 */ - setup->vmin = v2; - setup->vmid = v1; - setup->vmax = v0; - } - else { - /* y1<=y2<=y0 */ - setup->vmin = v1; - setup->vmid = v2; - setup->vmax = v0; - } - } - } - - setup->ebot.dx = setup->vmid[0][0] - setup->vmin[0][0]; - setup->ebot.dy = setup->vmid[0][1] - setup->vmin[0][1]; - setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0]; - setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1]; - setup->etop.dx = setup->vmax[0][0] - setup->vmid[0][0]; - setup->etop.dy = setup->vmax[0][1] - setup->vmid[0][1]; - - /* - * Compute triangle's area. Use 1/area to compute partial - * derivatives of attributes later. - * - * The area will be the same as prim->det, but the sign may be - * different depending on how the vertices get sorted above. - * - * To determine whether the primitive is front or back facing we - * use the prim->det value because its sign is correct. - */ - { - const float area = (setup->emaj.dx * setup->ebot.dy - - setup->ebot.dx * setup->emaj.dy); - - setup->oneoverarea = 1.0f / area; - - /* - debug_printf("%s one-over-area %f area %f det %f\n", - __FUNCTION__, setup->oneoverarea, area, det ); - */ - if (util_is_inf_or_nan(setup->oneoverarea)) - return FALSE; - } - - /* We need to know if this is a front or back-facing triangle for: - * - the GLSL gl_FrontFacing fragment attribute (bool) - * - two-sided stencil test - */ - setup->facing = - ((det > 0.0) ^ - (setup->llvmpipe->rasterizer->front_winding == PIPE_WINDING_CW)); - - return TRUE; -} - - -/** - * Compute a0, dadx and dady for a linearly interpolated coefficient, - * for a triangle. - */ -static void tri_pos_coeff( struct setup_context *setup, - uint vertSlot, unsigned i) -{ - float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i]; - float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; - float a = setup->ebot.dy * majda - botda * setup->emaj.dy; - float b = setup->emaj.dx * botda - majda * setup->ebot.dx; - float dadx = a * setup->oneoverarea; - float dady = b * setup->oneoverarea; - - assert(i <= 3); - - setup->coef.dadx[0][i] = dadx; - setup->coef.dady[0][i] = dady; - - /* calculate a0 as the value which would be sampled for the - * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). - * - * this is neat but unfortunately not a good way to do things for - * triangles with very large values of dadx or dady as it will - * result in the subtraction and re-addition from a0 of a very - * large number, which means we'll end up loosing a lot of the - * fractional bits and precision from a0. the way to fix this is - * to define a0 as the sample at a pixel center somewhere near vmin - * instead - i'll switch to this later. - */ - setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); - - /* - debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", - slot, "xyzw"[i], - setup->coef[slot].a0[i], - setup->coef[slot].dadx[i], - setup->coef[slot].dady[i]); - */ -} - - -/** - * Compute a0 for a constant-valued coefficient (GL_FLAT shading). - * The value value comes from vertex[slot][i]. - * The result will be put into setup->coef[slot].a0[i]. - * \param slot which attribute slot - * \param i which component of the slot (0..3) - */ -static void const_pos_coeff( struct setup_context *setup, - uint vertSlot, unsigned i) -{ - setup->coef.dadx[0][i] = 0; - setup->coef.dady[0][i] = 0; - - /* need provoking vertex info! - */ - setup->coef.a0[0][i] = setup->vprovoke[vertSlot][i]; -} - - -/** - * Compute a0 for a constant-valued coefficient (GL_FLAT shading). - * The value value comes from vertex[slot][i]. - * The result will be put into setup->coef[slot].a0[i]. - * \param slot which attribute slot - * \param i which component of the slot (0..3) - */ -static void const_coeff( struct setup_context *setup, - unsigned attrib, - uint vertSlot) -{ - unsigned i; - for (i = 0; i < NUM_CHANNELS; ++i) { - setup->coef.dadx[1 + attrib][i] = 0; - setup->coef.dady[1 + attrib][i] = 0; - - /* need provoking vertex info! - */ - setup->coef.a0[1 + attrib][i] = setup->vprovoke[vertSlot][i]; - } -} - - -/** - * Compute a0, dadx and dady for a linearly interpolated coefficient, - * for a triangle. - */ -static void tri_linear_coeff( struct setup_context *setup, - unsigned attrib, - uint vertSlot) -{ - unsigned i; - for (i = 0; i < NUM_CHANNELS; ++i) { - float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i]; - float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; - float a = setup->ebot.dy * majda - botda * setup->emaj.dy; - float b = setup->emaj.dx * botda - majda * setup->ebot.dx; - float dadx = a * setup->oneoverarea; - float dady = b * setup->oneoverarea; - - assert(i <= 3); - - setup->coef.dadx[1 + attrib][i] = dadx; - setup->coef.dady[1 + attrib][i] = dady; - - /* calculate a0 as the value which would be sampled for the - * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). - * - * this is neat but unfortunately not a good way to do things for - * triangles with very large values of dadx or dady as it will - * result in the subtraction and re-addition from a0 of a very - * large number, which means we'll end up loosing a lot of the - * fractional bits and precision from a0. the way to fix this is - * to define a0 as the sample at a pixel center somewhere near vmin - * instead - i'll switch to this later. - */ - setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); - - /* - debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", - slot, "xyzw"[i], - setup->coef[slot].a0[i], - setup->coef[slot].dadx[i], - setup->coef[slot].dady[i]); - */ - } -} - - -/** - * Compute a0, dadx and dady for a perspective-corrected interpolant, - * for a triangle. - * We basically multiply the vertex value by 1/w before computing - * the plane coefficients (a0, dadx, dady). - * Later, when we compute the value at a particular fragment position we'll - * divide the interpolated value by the interpolated W at that fragment. - */ -static void tri_persp_coeff( struct setup_context *setup, - unsigned attrib, - uint vertSlot) -{ - unsigned i; - for (i = 0; i < NUM_CHANNELS; ++i) { - /* premultiply by 1/w (v[0][3] is always W): - */ - float mina = setup->vmin[vertSlot][i] * setup->vmin[0][3]; - float mida = setup->vmid[vertSlot][i] * setup->vmid[0][3]; - float maxa = setup->vmax[vertSlot][i] * setup->vmax[0][3]; - float botda = mida - mina; - float majda = maxa - mina; - float a = setup->ebot.dy * majda - botda * setup->emaj.dy; - float b = setup->emaj.dx * botda - majda * setup->ebot.dx; - float dadx = a * setup->oneoverarea; - float dady = b * setup->oneoverarea; - - /* - debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i, - setup->vmin[vertSlot][i], - setup->vmid[vertSlot][i], - setup->vmax[vertSlot][i] - ); - */ - assert(i <= 3); - - setup->coef.dadx[1 + attrib][i] = dadx; - setup->coef.dady[1 + attrib][i] = dady; - setup->coef.a0[1 + attrib][i] = (mina - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); - } -} - - -/** - * Special coefficient setup for gl_FragCoord. - * X and Y are trivial, though Y has to be inverted for OpenGL. - * Z and W are copied from posCoef which should have already been computed. - * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. - */ -static void -setup_fragcoord_coeff(struct setup_context *setup, uint slot) -{ - /*X*/ - setup->coef.a0[1 + slot][0] = 0; - setup->coef.dadx[1 + slot][0] = 1.0; - setup->coef.dady[1 + slot][0] = 0.0; - /*Y*/ - setup->coef.a0[1 + slot][1] = 0.0; - setup->coef.dadx[1 + slot][1] = 0.0; - setup->coef.dady[1 + slot][1] = 1.0; - /*Z*/ - setup->coef.a0[1 + slot][2] = setup->coef.a0[0][2]; - setup->coef.dadx[1 + slot][2] = setup->coef.dadx[0][2]; - setup->coef.dady[1 + slot][2] = setup->coef.dady[0][2]; - /*W*/ - setup->coef.a0[1 + slot][3] = setup->coef.a0[0][3]; - setup->coef.dadx[1 + slot][3] = setup->coef.dadx[0][3]; - setup->coef.dady[1 + slot][3] = setup->coef.dady[0][3]; -} - - - -/** - * Compute the setup->coef[] array dadx, dady, a0 values. - * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized. - */ -static void setup_tri_coefficients( struct setup_context *setup ) -{ - struct llvmpipe_context *llvmpipe = setup->llvmpipe; - const struct lp_fragment_shader *lpfs = llvmpipe->fs; - const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe); - uint fragSlot; - - /* z and w are done by linear interpolation: - */ - tri_pos_coeff(setup, 0, 2); - tri_pos_coeff(setup, 0, 3); - - /* setup interpolation for all the remaining attributes: - */ - for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) { - const uint vertSlot = vinfo->attrib[fragSlot].src_index; - - switch (vinfo->attrib[fragSlot].interp_mode) { - case INTERP_CONSTANT: - const_coeff(setup, fragSlot, vertSlot); - break; - case INTERP_LINEAR: - tri_linear_coeff(setup, fragSlot, vertSlot); - break; - case INTERP_PERSPECTIVE: - tri_persp_coeff(setup, fragSlot, vertSlot); - break; - case INTERP_POS: - setup_fragcoord_coeff(setup, fragSlot); - break; - default: - assert(0); - } - - if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { - setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing; - setup->coef.dadx[1 + fragSlot][0] = 0.0; - setup->coef.dady[1 + fragSlot][0] = 0.0; - } - } -} - - - -static void setup_tri_edges( struct setup_context *setup ) -{ - float vmin_x = setup->vmin[0][0] + 0.5f; - float vmid_x = setup->vmid[0][0] + 0.5f; - - float vmin_y = setup->vmin[0][1] - 0.5f; - float vmid_y = setup->vmid[0][1] - 0.5f; - float vmax_y = setup->vmax[0][1] - 0.5f; - - setup->emaj.sy = ceilf(vmin_y); - setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy); - setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy; - setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy; - - setup->etop.sy = ceilf(vmid_y); - setup->etop.lines = (int) ceilf(vmax_y - setup->etop.sy); - setup->etop.dxdy = setup->etop.dx / setup->etop.dy; - setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy; - - setup->ebot.sy = ceilf(vmin_y); - setup->ebot.lines = (int) ceilf(vmid_y - setup->ebot.sy); - setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy; - setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy; -} - - -/** - * Render the upper or lower half of a triangle. - * Scissoring/cliprect is applied here too. - */ -static void subtriangle( struct setup_context *setup, - struct edge *eleft, - struct edge *eright, - unsigned lines ) -{ - const struct pipe_scissor_state *cliprect = &setup->llvmpipe->cliprect; - const int minx = (int) cliprect->minx; - const int maxx = (int) cliprect->maxx; - const int miny = (int) cliprect->miny; - const int maxy = (int) cliprect->maxy; - int y, start_y, finish_y; - int sy = (int)eleft->sy; - - assert((int)eleft->sy == (int) eright->sy); - - /* clip top/bottom */ - start_y = sy; - if (start_y < miny) - start_y = miny; - - finish_y = sy + lines; - if (finish_y > maxy) - finish_y = maxy; - - start_y -= sy; - finish_y -= sy; - - /* - debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y); - */ - - for (y = start_y; y < finish_y; y++) { - - /* avoid accumulating adds as floats don't have the precision to - * accurately iterate large triangle edges that way. luckily we - * can just multiply these days. - * - * this is all drowned out by the attribute interpolation anyway. - */ - int left = (int)(eleft->sx + y * eleft->dxdy); - int right = (int)(eright->sx + y * eright->dxdy); - - /* clip left/right */ - if (left < minx) - left = minx; - if (right > maxx) - right = maxx; - - if (left < right) { - int _y = sy + y; - if (block(_y) != setup->span.y) { - flush_spans(setup); - setup->span.y = block(_y); - } - - setup->span.left[_y&1] = left; - setup->span.right[_y&1] = right; - } - } - - - /* save the values so that emaj can be restarted: - */ - eleft->sx += lines * eleft->dxdy; - eright->sx += lines * eright->dxdy; - eleft->sy += lines; - eright->sy += lines; -} - - -/** - * Recalculate prim's determinant. This is needed as we don't have - * get this information through the vbuf_render interface & we must - * calculate it here. - */ -static float -calc_det( const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4] ) -{ - /* edge vectors e = v0 - v2, f = v1 - v2 */ - const float ex = v0[0][0] - v2[0][0]; - const float ey = v0[0][1] - v2[0][1]; - const float fx = v1[0][0] - v2[0][0]; - const float fy = v1[0][1] - v2[0][1]; - - /* det = cross(e,f).z */ - return ex * fy - ey * fx; -} - - -/** - * Do setup for triangle rasterization, then render the triangle. - */ -void llvmpipe_setup_tri( struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4] ) -{ - float det; - -#if DEBUG_VERTS - debug_printf("Setup triangle:\n"); - print_vertex(setup, v0); - print_vertex(setup, v1); - print_vertex(setup, v2); -#endif - - if (setup->llvmpipe->no_rast) - return; - - det = calc_det(v0, v1, v2); - /* - debug_printf("%s\n", __FUNCTION__ ); - */ - -#if DEBUG_FRAGS - setup->numFragsEmitted = 0; - setup->numFragsWritten = 0; -#endif - - if (cull_tri( setup, det )) - return; - - if (!setup_sort_vertices( setup, det, v0, v1, v2 )) - return; - setup_tri_coefficients( setup ); - setup_tri_edges( setup ); - - assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_TRIANGLES); - - setup->span.y = 0; - setup->span.right[0] = 0; - setup->span.right[1] = 0; - /* setup->span.z_mode = tri_z_mode( setup->ctx ); */ - - /* init_constant_attribs( setup ); */ - - if (setup->oneoverarea < 0.0) { - /* emaj on left: - */ - subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines ); - subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines ); - } - else { - /* emaj on right: - */ - subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines ); - subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines ); - } - - flush_spans( setup ); - -#if DEBUG_FRAGS - printf("Tri: %u frags emitted, %u written\n", - setup->numFragsEmitted, - setup->numFragsWritten); -#endif -} - - - -/** - * Compute a0, dadx and dady for a linearly interpolated coefficient, - * for a line. - */ -static void -linear_pos_coeff(struct setup_context *setup, - uint vertSlot, uint i) -{ - const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; - const float dadx = da * setup->emaj.dx * setup->oneoverarea; - const float dady = da * setup->emaj.dy * setup->oneoverarea; - setup->coef.dadx[0][i] = dadx; - setup->coef.dady[0][i] = dady; - setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); -} - - -/** - * Compute a0, dadx and dady for a linearly interpolated coefficient, - * for a line. - */ -static void -line_linear_coeff(struct setup_context *setup, - unsigned attrib, - uint vertSlot) -{ - unsigned i; - for (i = 0; i < NUM_CHANNELS; ++i) { - const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; - const float dadx = da * setup->emaj.dx * setup->oneoverarea; - const float dady = da * setup->emaj.dy * setup->oneoverarea; - setup->coef.dadx[1 + attrib][i] = dadx; - setup->coef.dady[1 + attrib][i] = dady; - setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); - } -} - - -/** - * Compute a0, dadx and dady for a perspective-corrected interpolant, - * for a line. - */ -static void -line_persp_coeff(struct setup_context *setup, - unsigned attrib, - uint vertSlot) -{ - unsigned i; - for (i = 0; i < NUM_CHANNELS; ++i) { - /* XXX double-check/verify this arithmetic */ - const float a0 = setup->vmin[vertSlot][i] * setup->vmin[0][3]; - const float a1 = setup->vmax[vertSlot][i] * setup->vmax[0][3]; - const float da = a1 - a0; - const float dadx = da * setup->emaj.dx * setup->oneoverarea; - const float dady = da * setup->emaj.dy * setup->oneoverarea; - setup->coef.dadx[1 + attrib][i] = dadx; - setup->coef.dady[1 + attrib][i] = dady; - setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); - } -} - - -/** - * Compute the setup->coef[] array dadx, dady, a0 values. - * Must be called after setup->vmin,vmax are initialized. - */ -static INLINE boolean -setup_line_coefficients(struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4]) -{ - struct llvmpipe_context *llvmpipe = setup->llvmpipe; - const struct lp_fragment_shader *lpfs = llvmpipe->fs; - const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe); - uint fragSlot; - float area; - - /* use setup->vmin, vmax to point to vertices */ - if (llvmpipe->rasterizer->flatshade_first) - setup->vprovoke = v0; - else - setup->vprovoke = v1; - setup->vmin = v0; - setup->vmax = v1; - - setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0]; - setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1]; - - /* NOTE: this is not really area but something proportional to it */ - area = setup->emaj.dx * setup->emaj.dx + setup->emaj.dy * setup->emaj.dy; - if (area == 0.0f || util_is_inf_or_nan(area)) - return FALSE; - setup->oneoverarea = 1.0f / area; - - /* z and w are done by linear interpolation: - */ - linear_pos_coeff(setup, 0, 2); - linear_pos_coeff(setup, 0, 3); - - /* setup interpolation for all the remaining attributes: - */ - for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) { - const uint vertSlot = vinfo->attrib[fragSlot].src_index; - - switch (vinfo->attrib[fragSlot].interp_mode) { - case INTERP_CONSTANT: - const_coeff(setup, fragSlot, vertSlot); - break; - case INTERP_LINEAR: - line_linear_coeff(setup, fragSlot, vertSlot); - break; - case INTERP_PERSPECTIVE: - line_persp_coeff(setup, fragSlot, vertSlot); - break; - case INTERP_POS: - setup_fragcoord_coeff(setup, fragSlot); - break; - default: - assert(0); - } - - if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { - setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing; - setup->coef.dadx[1 + fragSlot][0] = 0.0; - setup->coef.dady[1 + fragSlot][0] = 0.0; - } - } - return TRUE; -} - - -/** - * Plot a pixel in a line segment. +/* Stubs for lines & points for now: */ -static INLINE void -plot(struct setup_context *setup, int x, int y) +void +llvmpipe_setup_point(struct setup_context *setup, + const float (*v0)[4]) { - const int iy = y & 1; - const int ix = x & 1; - const int quadX = x - ix; - const int quadY = y - iy; - const int mask = (1 << ix) << (2 * iy); - - if (quadX != setup->quad[0].input.x0 || - quadY != setup->quad[0].input.y0) - { - /* flush prev quad, start new quad */ - - if (setup->quad[0].input.x0 != -1) - clip_emit_quad( setup, &setup->quad[0] ); - - setup->quad[0].input.x0 = quadX; - setup->quad[0].input.y0 = quadY; - setup->quad[0].inout.mask = 0x0; - } - - setup->quad[0].inout.mask |= mask; } - -/** - * Do setup for line rasterization, then render the line. - * Single-pixel width, no stipple, etc. We rely on the 'draw' module - * to handle stippling and wide lines. - */ void llvmpipe_setup_line(struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4]) -{ - int x0 = (int) v0[0][0]; - int x1 = (int) v1[0][0]; - int y0 = (int) v0[0][1]; - int y1 = (int) v1[0][1]; - int dx = x1 - x0; - int dy = y1 - y0; - int xstep, ystep; - -#if DEBUG_VERTS - debug_printf("Setup line:\n"); - print_vertex(setup, v0); - print_vertex(setup, v1); -#endif - - if (setup->llvmpipe->no_rast) - return; - - if (dx == 0 && dy == 0) - return; - - if (!setup_line_coefficients(setup, v0, v1)) - return; - - assert(v0[0][0] < 1.0e9); - assert(v0[0][1] < 1.0e9); - assert(v1[0][0] < 1.0e9); - assert(v1[0][1] < 1.0e9); - - if (dx < 0) { - dx = -dx; /* make positive */ - xstep = -1; - } - else { - xstep = 1; - } - - if (dy < 0) { - dy = -dy; /* make positive */ - ystep = -1; - } - else { - ystep = 1; - } - - assert(dx >= 0); - assert(dy >= 0); - assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_LINES); - - setup->quad[0].input.x0 = setup->quad[0].input.y0 = -1; - setup->quad[0].inout.mask = 0x0; - - /* XXX temporary: set coverage to 1.0 so the line appears - * if AA mode happens to be enabled. - */ - setup->quad[0].input.coverage[0] = - setup->quad[0].input.coverage[1] = - setup->quad[0].input.coverage[2] = - setup->quad[0].input.coverage[3] = 1.0; - - if (dx > dy) { - /*** X-major line ***/ - int i; - const int errorInc = dy + dy; - int error = errorInc - dx; - const int errorDec = error - dx; - - for (i = 0; i < dx; i++) { - plot(setup, x0, y0); - - x0 += xstep; - if (error < 0) { - error += errorInc; - } - else { - error += errorDec; - y0 += ystep; - } - } - } - else { - /*** Y-major line ***/ - int i; - const int errorInc = dx + dx; - int error = errorInc - dy; - const int errorDec = error - dy; - - for (i = 0; i < dy; i++) { - plot(setup, x0, y0); - - y0 += ystep; - if (error < 0) { - error += errorInc; - } - else { - error += errorDec; - x0 += xstep; - } - } - } - - /* draw final quad */ - if (setup->quad[0].inout.mask) { - clip_emit_quad( setup, &setup->quad[0] ); - } -} - - -static void -point_persp_coeff(struct setup_context *setup, - const float (*vert)[4], - unsigned attrib, - uint vertSlot) + const float (*v0)[4], + const float (*v1)[4]) { - unsigned i; - for(i = 0; i < NUM_CHANNELS; ++i) { - setup->coef.dadx[1 + attrib][i] = 0.0F; - setup->coef.dady[1 + attrib][i] = 0.0F; - setup->coef.a0[1 + attrib][i] = vert[vertSlot][i] * vert[0][3]; - } } -/** - * Do setup for point rasterization, then render the point. - * Round or square points... - * XXX could optimize a lot for 1-pixel points. +/* Called after statechange, before emitting primitives. If binning + * is active, this function should store relevant state in the binning + * context. + * + * That includes: + * - current fragment shader function + * - bound constant buffer contents + * - bound textures + * - blend color + * - etc. + * + * Basically everything needed at some point in the future to + * rasterize triangles for the current state. + * + * Additionally this will set up the state needed for the rasterizer + * to process and bin incoming triangles. That would include such + * things as: + * - cull mode + * - ??? + * - etc. + * */ -void -llvmpipe_setup_point( struct setup_context *setup, - const float (*v0)[4] ) -{ - struct llvmpipe_context *llvmpipe = setup->llvmpipe; - const struct lp_fragment_shader *lpfs = llvmpipe->fs; - const int sizeAttr = setup->llvmpipe->psize_slot; - const float size - = sizeAttr > 0 ? v0[sizeAttr][0] - : setup->llvmpipe->rasterizer->point_size; - const float halfSize = 0.5F * size; - const boolean round = (boolean) setup->llvmpipe->rasterizer->point_smooth; - const float x = v0[0][0]; /* Note: data[0] is always position */ - const float y = v0[0][1]; - const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe); - uint fragSlot; - -#if DEBUG_VERTS - debug_printf("Setup point:\n"); - print_vertex(setup, v0); -#endif - - if (llvmpipe->no_rast) - return; - - assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_POINTS); - - /* For points, all interpolants are constant-valued. - * However, for point sprites, we'll need to setup texcoords appropriately. - * XXX: which coefficients are the texcoords??? - * We may do point sprites as textured quads... - * - * KW: We don't know which coefficients are texcoords - ultimately - * the choice of what interpolation mode to use for each attribute - * should be determined by the fragment program, using - * per-attribute declaration statements that include interpolation - * mode as a parameter. So either the fragment program will have - * to be adjusted for pointsprite vs normal point behaviour, or - * otherwise a special interpolation mode will have to be defined - * which matches the required behaviour for point sprites. But - - * the latter is not a feature of normal hardware, and as such - * probably should be ruled out on that basis. - */ - setup->vprovoke = v0; - - /* setup Z, W */ - const_pos_coeff(setup, 0, 2); - const_pos_coeff(setup, 0, 3); - - for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) { - const uint vertSlot = vinfo->attrib[fragSlot].src_index; - - switch (vinfo->attrib[fragSlot].interp_mode) { - case INTERP_CONSTANT: - /* fall-through */ - case INTERP_LINEAR: - const_coeff(setup, fragSlot, vertSlot); - break; - case INTERP_PERSPECTIVE: - point_persp_coeff(setup, setup->vprovoke, fragSlot, vertSlot); - break; - case INTERP_POS: - setup_fragcoord_coeff(setup, fragSlot); - break; - default: - assert(0); - } - - if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { - setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing; - setup->coef.dadx[1 + fragSlot][0] = 0.0; - setup->coef.dady[1 + fragSlot][0] = 0.0; - } - } - - - if (halfSize <= 0.5 && !round) { - /* special case for 1-pixel points */ - const int ix = ((int) x) & 1; - const int iy = ((int) y) & 1; - setup->quad[0].input.x0 = (int) x - ix; - setup->quad[0].input.y0 = (int) y - iy; - setup->quad[0].inout.mask = (1 << ix) << (2 * iy); - clip_emit_quad( setup, &setup->quad[0] ); - } - else { - if (round) { - /* rounded points */ - const int ixmin = block((int) (x - halfSize)); - const int ixmax = block((int) (x + halfSize)); - const int iymin = block((int) (y - halfSize)); - const int iymax = block((int) (y + halfSize)); - const float rmin = halfSize - 0.7071F; /* 0.7071 = sqrt(2)/2 */ - const float rmax = halfSize + 0.7071F; - const float rmin2 = MAX2(0.0F, rmin * rmin); - const float rmax2 = rmax * rmax; - const float cscale = 1.0F / (rmax2 - rmin2); - int ix, iy; - - for (iy = iymin; iy <= iymax; iy += 2) { - for (ix = ixmin; ix <= ixmax; ix += 2) { - float dx, dy, dist2, cover; - - setup->quad[0].inout.mask = 0x0; - - dx = (ix + 0.5f) - x; - dy = (iy + 0.5f) - y; - dist2 = dx * dx + dy * dy; - if (dist2 <= rmax2) { - cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad[0].input.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f); - setup->quad[0].inout.mask |= MASK_TOP_LEFT; - } - - dx = (ix + 1.5f) - x; - dy = (iy + 0.5f) - y; - dist2 = dx * dx + dy * dy; - if (dist2 <= rmax2) { - cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad[0].input.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f); - setup->quad[0].inout.mask |= MASK_TOP_RIGHT; - } - - dx = (ix + 0.5f) - x; - dy = (iy + 1.5f) - y; - dist2 = dx * dx + dy * dy; - if (dist2 <= rmax2) { - cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad[0].input.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f); - setup->quad[0].inout.mask |= MASK_BOTTOM_LEFT; - } - - dx = (ix + 1.5f) - x; - dy = (iy + 1.5f) - y; - dist2 = dx * dx + dy * dy; - if (dist2 <= rmax2) { - cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad[0].input.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f); - setup->quad[0].inout.mask |= MASK_BOTTOM_RIGHT; - } - - if (setup->quad[0].inout.mask) { - setup->quad[0].input.x0 = ix; - setup->quad[0].input.y0 = iy; - clip_emit_quad( setup, &setup->quad[0] ); - } - } - } - } - else { - /* square points */ - const int xmin = (int) (x + 0.75 - halfSize); - const int ymin = (int) (y + 0.25 - halfSize); - const int xmax = xmin + (int) size; - const int ymax = ymin + (int) size; - /* XXX could apply scissor to xmin,ymin,xmax,ymax now */ - const int ixmin = block(xmin); - const int ixmax = block(xmax - 1); - const int iymin = block(ymin); - const int iymax = block(ymax - 1); - int ix, iy; - - /* - debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax); - */ - for (iy = iymin; iy <= iymax; iy += 2) { - uint rowMask = 0xf; - if (iy < ymin) { - /* above the top edge */ - rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); - } - if (iy + 1 >= ymax) { - /* below the bottom edge */ - rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); - } - - for (ix = ixmin; ix <= ixmax; ix += 2) { - uint mask = rowMask; - - if (ix < xmin) { - /* fragment is past left edge of point, turn off left bits */ - mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); - } - if (ix + 1 >= xmax) { - /* past the right edge */ - mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); - } - - setup->quad[0].inout.mask = mask; - setup->quad[0].input.x0 = ix; - setup->quad[0].input.y0 = iy; - clip_emit_quad( setup, &setup->quad[0] ); - } - } - } - } -} - -void llvmpipe_setup_prepare( struct setup_context *setup ) +void setup_prepare( struct setup_context *setup ) { struct llvmpipe_context *lp = setup->llvmpipe; @@ -1442,6 +94,8 @@ void llvmpipe_setup_prepare( struct setup_context *setup ) llvmpipe_update_derived(lp); } + lp->quad.first->begin( lp->quad.first ); + if (lp->reduced_api_prim == PIPE_PRIM_TRIANGLES && lp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL && lp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) { @@ -1452,38 +106,28 @@ void llvmpipe_setup_prepare( struct setup_context *setup ) /* 'draw' will do culling */ setup->winding = PIPE_WINDING_NONE; } + + setup_prepare_tri( setup->llvmpipe ); } -void llvmpipe_setup_destroy_context( struct setup_context *setup ) +void setup_destroy_context( struct setup_context *setup ) { - align_free( setup ); + FREE( setup ); } /** * Create a new primitive setup/render stage. */ -struct setup_context *llvmpipe_setup_create_context( struct llvmpipe_context *llvmpipe ) +struct setup_context *setup_create_context( struct llvmpipe_context *llvmpipe ) { - struct setup_context *setup; + struct setup_context *setup = CALLOC_STRUCT(setup_context); unsigned i; - setup = align_malloc(sizeof(struct setup_context), 16); - if (!setup) - return NULL; - - memset(setup, 0, sizeof *setup); setup->llvmpipe = llvmpipe; - for (i = 0; i < MAX_QUADS; i++) { - setup->quad[i].coef = &setup->coef; - } - - setup->span.left[0] = 1000000; /* greater than right[0] */ - setup->span.left[1] = 1000000; /* greater than right[1] */ - return setup; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 89c43da046..05aaaf83b8 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -30,11 +30,8 @@ struct setup_context; struct llvmpipe_context; -void -llvmpipe_setup_tri( struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4] ); +/* Note, not using setup_context currently + */ void llvmpipe_setup_line(struct setup_context *setup, @@ -46,8 +43,12 @@ llvmpipe_setup_point( struct setup_context *setup, const float (*v0)[4] ); -struct setup_context *llvmpipe_setup_create_context( struct llvmpipe_context *llvmpipe ); -void llvmpipe_setup_prepare( struct setup_context *setup ); -void llvmpipe_setup_destroy_context( struct setup_context *setup ); +struct setup_context *setup_create_context( struct llvmpipe_context *llvmpipe ); + +void setup_prepare( struct setup_context *setup ); + +void setup_destroy_context( struct setup_context *setup ); + +void setup_prepare_tri( struct llvmpipe_context *llvmpipe ); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h new file mode 100644 index 0000000000..848705e099 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -0,0 +1,140 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +#ifndef LP_SETUP_CONTEXT_H +#define LP_SETUP_CONTEXT_H + +struct clear_tile { + boolean do_color; + boolean do_depth_stencil; + unsigned rgba; + unsigned depth_stencil; +}; + +struct load_tile { + boolean do_color; + boolean do_depth_stencil; +}; + +/* Shade tile points directly at this: + */ +struct shader_inputs { + /* Some way of updating rasterizer state: + */ + /* ??? */ + + /* Attribute interpolation: + */ + float oneoverarea; + float x1; + float y1; + + struct tgsi_interp_coef position_coef; + struct tgsi_interp_coef *coef; +}; + +/* Shade triangle points at this: + */ +struct shade_triangle { + /* one-pixel sized trivial accept offsets for each plane */ + float ei1; + float ei2; + float ei3; + + /* one-pixel sized trivial reject offsets for each plane */ + float eo1; + float eo2; + float eo3; + + /* y deltas for vertex pairs */ + float dy12; + float dy23; + float dy31; + + /* x deltas for vertex pairs */ + float dx12; + float dx23; + float dx31; + + struct shader_inputs inputs; +}; + +struct bin_cmd { + enum { + CMD_END = 0, + CMD_CLEAR, + CMD_LOAD_TILE, + CMD_SHADE_TILE, + CMD_SHADE_TRIANGLE, + } cmd; + + union { + struct triangle *tri; + struct clear *clear; + } ptr; +}; + +struct cmd_block { + struct bin_cmd cmds[128]; + unsigned count; + struct cmd_block *next; +}; + +/* Triangles + */ +struct data_block { + ubyte data[4096 - sizeof(unsigned) - sizeof(struct cmd_block *)]; + unsigned count; + struct data_block *next; +}; + +/* Need to store the state at the time the triangle was drawn, at + * least as it is needed during rasterization. That would include at + * minimum the constant values referred to by the fragment shader, + * blend state, etc. Much of this is code-generated into the shader + * in llvmpipe -- may be easier to do this work there. + */ +struct state_block { +}; + + +/** + * Basically all the data from a binner scene: + */ +struct binned_scene { + struct llvmpipe_context *llvmpipe; + + struct cmd_block *bin[MAX_HEIGHT / BIN_SIZE][MAX_WIDTH / BIN_SIZE]; + struct data_block *data; +}; + +static INLINE struct triangle *get_triangle( struct setup_context *setup ) +{ + if (setup->triangles->count == TRIANGLE_BLOCK_COUNT) + return setup_triangle_from_new_block( setup ); + + return &setup->triangles[setup->triangles->count++]; +} diff --git a/src/gallium/drivers/llvmpipe/lp_setup_rasterize.c b/src/gallium/drivers/llvmpipe/lp_setup_rasterize.c new file mode 100644 index 0000000000..5b4faf489b --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_setup_rasterize.c @@ -0,0 +1,7 @@ + +void +rasterize( struct llvmpipe_context *llvmpipe, + struct binned_scene *scene ) +{ + +} diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c new file mode 100644 index 0000000000..a09e0fa643 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -0,0 +1,755 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Recursive rasterization for triangles + */ + +#include "lp_context.h" +#include "lp_quad.h" +#include "lp_quad_pipe.h" +#include "lp_setup.h" +#include "lp_state.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vertex.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_thread.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#define BLOCKSIZE 4 + +struct triangle { + /* one-pixel sized trivial accept offsets for each plane */ + float ei1; + float ei2; + float ei3; + + /* one-pixel sized trivial reject offsets for each plane */ + float eo1; + float eo2; + float eo3; + + /* y deltas for vertex pairs */ + float dy12; + float dy23; + float dy31; + + /* x deltas for vertex pairs */ + float dx12; + float dx23; + float dx31; + + /* Attribute interpolation: + */ + float oneoverarea; + float x1; + float y1; + struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS]; + struct tgsi_interp_coef position_coef; + + /* A run of pre-initialized quads: + */ + struct llvmpipe_context *llvmpipe; + struct quad_header quad[4]; +}; + + +/** + * Compute a0 for a constant-valued coefficient (GL_FLAT shading). + */ +static void constant_coef( struct tgsi_interp_coef *coef, + const float (*v3)[4], + unsigned vert_attr, + unsigned i ) +{ + coef->a0[i] = v3[vert_attr][i]; + coef->dadx[i] = 0; + coef->dady[i] = 0; +} + +/** + * Compute a0, dadx and dady for a linearly interpolated coefficient, + * for a triangle. + */ +static void linear_coef( struct triangle *tri, + struct tgsi_interp_coef *coef, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4], + unsigned vert_attr, + unsigned i) +{ + float a1 = v1[vert_attr][i]; + float a2 = v2[vert_attr][i]; + float a3 = v3[vert_attr][i]; + + float da12 = a1 - a2; + float da31 = a3 - a1; + float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; + float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; + + coef->dadx[i] = dadx; + coef->dady[i] = dady; + + /* calculate a0 as the value which would be sampled for the + * fragment at (0,0), taking into account that we want to sample at + * pixel centers, in other words (0.5, 0.5). + * + * this is neat but unfortunately not a good way to do things for + * triangles with very large values of dadx or dady as it will + * result in the subtraction and re-addition from a0 of a very + * large number, which means we'll end up loosing a lot of the + * fractional bits and precision from a0. the way to fix this is + * to define a0 as the sample at a pixel center somewhere near vmin + * instead - i'll switch to this later. + */ + coef->a0[i] = (v1[vert_attr][i] - + (dadx * (v1[0][0] - 0.5f) + + dady * (v1[0][1] - 0.5f))); +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void perspective_coef( struct triangle *tri, + struct tgsi_interp_coef *coef, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4], + unsigned vert_attr, + unsigned i) +{ + /* premultiply by 1/w (v[0][3] is always 1/w): + */ + float a1 = v1[vert_attr][i] * v1[0][3]; + float a2 = v2[vert_attr][i] * v2[0][3]; + float a3 = v3[vert_attr][i] * v3[0][3]; + float da12 = a1 - a2; + float da31 = a3 - a1; + float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; + float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; + + + coef->dadx[i] = dadx; + coef->dady[i] = dady; + coef->a0[i] = (a1 - + (dadx * (v1[0][0] - 0.5f) + + dady * (v1[0][1] - 0.5f))); +} + + +/** + * Special coefficient setup for gl_FragCoord. + * X and Y are trivial, though Y has to be inverted for OpenGL. + * Z and W are copied from position_coef which should have already been computed. + * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. + */ +static void +setup_fragcoord_coef(struct triangle *tri, unsigned slot) +{ + /*X*/ + tri->coef[slot].a0[0] = 0.0; + tri->coef[slot].dadx[0] = 1.0; + tri->coef[slot].dady[0] = 0.0; + /*Y*/ + tri->coef[slot].a0[1] = 0.0; + tri->coef[slot].dadx[1] = 0.0; + tri->coef[slot].dady[1] = 1.0; + /*Z*/ + tri->coef[slot].a0[2] = tri->position_coef.a0[2]; + tri->coef[slot].dadx[2] = tri->position_coef.dadx[2]; + tri->coef[slot].dady[2] = tri->position_coef.dady[2]; + /*W*/ + tri->coef[slot].a0[3] = tri->position_coef.a0[3]; + tri->coef[slot].dadx[3] = tri->position_coef.dadx[3]; + tri->coef[slot].dady[3] = tri->position_coef.dady[3]; +} + + + +/** + * Compute the tri->coef[] array dadx, dady, a0 values. + */ +static void setup_tri_coefficients( struct llvmpipe_context *llvmpipe, + struct triangle *tri, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4], + boolean frontface ) +{ + const struct lp_fragment_shader *fs = llvmpipe->fs; + const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe); + unsigned input; + + /* z and w are done by linear interpolation: + */ + linear_coef(tri, &tri->position_coef, v1, v2, v3, 0, 2); + linear_coef(tri, &tri->position_coef, v1, v2, v3, 0, 3); + + /* setup interpolation for all the remaining attributes: + */ + for (input = 0; input < fs->info.num_inputs; input++) { + unsigned vert_attr = vinfo->attrib[input].src_index; + unsigned i; + + switch (vinfo->attrib[input].interp_mode) { + case INTERP_CONSTANT: + for (i = 0; i < NUM_CHANNELS; i++) + constant_coef(&tri->coef[input], v3, vert_attr, i); + break; + + case INTERP_LINEAR: + for (i = 0; i < NUM_CHANNELS; i++) + linear_coef(tri, &tri->coef[input], v1, v2, v3, vert_attr, i); + break; + + case INTERP_PERSPECTIVE: + for (i = 0; i < NUM_CHANNELS; i++) + perspective_coef(tri, &tri->coef[input], v1, v2, v3, vert_attr, i); + break; + + case INTERP_POS: + setup_fragcoord_coef(tri, input); + break; + + default: + assert(0); + } + + if (fs->info.input_semantic_name[input] == TGSI_SEMANTIC_FACE) { + tri->coef[input].a0[0] = 1.0f - frontface; + tri->coef[input].dadx[0] = 0.0; + tri->coef[input].dady[0] = 0.0; + } + } +} + + + +/* XXX: do this by add/subtracting a large floating point number: + */ +static inline float subpixel_snap( float a ) +{ + int i = a * 16; + return (float)i * (1.0/16); +} + + +/* Convert 8x8 block into four runs of quads and render each in turn. + */ +#if (BLOCKSIZE == 8) +static void block_full( struct triangle *tri, int x, int y ) +{ + struct quad_header *ptrs[4]; + int i; + + tri->quad[0].input.x0 = x + 0; + tri->quad[1].input.x0 = x + 2; + tri->quad[2].input.x0 = x + 4; + tri->quad[3].input.x0 = x + 6; + + for (i = 0; i < 4; i++, y += 2) { + tri->quad[0].inout.mask = 0xf; + tri->quad[1].inout.mask = 0xf; + tri->quad[2].inout.mask = 0xf; + tri->quad[3].inout.mask = 0xf; + + tri->quad[0].input.y0 = y; + tri->quad[1].input.y0 = y; + tri->quad[2].input.y0 = y; + tri->quad[3].input.y0 = y; + + /* XXX: don't bother with this ptrs business */ + ptrs[0] = &tri->quad[0]; + ptrs[1] = &tri->quad[1]; + ptrs[2] = &tri->quad[2]; + ptrs[3] = &tri->quad[3]; + + tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 4 ); + } +} +#elif (BLOCKSIZE == 4) +static void block_full( struct triangle *tri, int x, int y ) +{ + struct quad_header *ptrs[4]; + int iy; + + tri->quad[0].input.x0 = x + 0; + tri->quad[1].input.x0 = x + 2; + + for (iy = 0; iy < 4; iy += 2) { + tri->quad[0].inout.mask = 0xf; + tri->quad[1].inout.mask = 0xf; + + tri->quad[0].input.y0 = y + iy; + tri->quad[1].input.y0 = y + iy; + + /* XXX: don't bother with this ptrs business */ + ptrs[0] = &tri->quad[0]; + ptrs[1] = &tri->quad[1]; + + tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 2 ); + } +} +#else +static void block_full( struct triangle *tri, int x, int y ) +{ + struct quad_header *ptrs[4]; + int iy; + + tri->quad[0].input.x0 = x; + tri->quad[0].input.y0 = y; + tri->quad[0].inout.mask = 0xf; + + ptrs[0] = &tri->quad[0]; + tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 1 ); +} +#endif + + +static void +do_quad( struct triangle *tri, + int x, int y, + float c1, float c2, float c3 ) +{ + struct quad_header *quad = &tri->quad[0]; + + float xstep1 = -tri->dy12; + float xstep2 = -tri->dy23; + float xstep3 = -tri->dy31; + + float ystep1 = tri->dx12; + float ystep2 = tri->dx23; + float ystep3 = tri->dx31; + + quad->input.x0 = x; + quad->input.y0 = y; + quad->inout.mask = 0; + + if (c1 > 0 && + c2 > 0 && + c3 > 0) + quad->inout.mask |= 1; + + if (c1 + xstep1 > 0 && + c2 + xstep2 > 0 && + c3 + xstep3 > 0) + quad->inout.mask |= 2; + + if (c1 + ystep1 > 0 && + c2 + ystep2 > 0 && + c3 + ystep3 > 0) + quad->inout.mask |= 4; + + if (c1 + ystep1 + xstep1 > 0 && + c2 + ystep2 + xstep2 > 0 && + c3 + ystep3 + xstep3 > 0) + quad->inout.mask |= 8; + + if (quad->inout.mask) + tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, &quad, 1 ); +} + +/* Evaluate each pixel in a block, generate a mask and possibly render + * the quad: + */ +static void +do_block( struct triangle *tri, + int x, int y, + float c1, + float c2, + float c3 ) +{ + const int step = 2; + + float xstep1 = -step * tri->dy12; + float xstep2 = -step * tri->dy23; + float xstep3 = -step * tri->dy31; + + float ystep1 = step * tri->dx12; + float ystep2 = step * tri->dx23; + float ystep3 = step * tri->dx31; + + int ix, iy; + + for (iy = 0; iy < BLOCKSIZE; iy += 2) { + float cx1 = c1; + float cx2 = c2; + float cx3 = c3; + + for (ix = 0; ix < BLOCKSIZE; ix += 2) { + + do_quad(tri, x+ix, y+iy, cx1, cx2, cx3); + + cx1 += xstep1; + cx2 += xstep2; + cx3 += xstep3; + } + + c1 += ystep1; + c2 += ystep2; + c3 += ystep3; + } +} + + + + +/* to avoid having to allocate power-of-four, square render targets, + * end up having a specialized version of the above that runs only at + * the topmost level. + * + * at the topmost level there may be an arbitary number of steps on + * either dimension, so this loop needs to be either separately + * code-generated and unrolled for each render target size, or kept as + * generic looping code: + */ + +#define MIN3(a,b,c) MIN2(MIN2(a,b),c) +#define MAX3(a,b,c) MAX2(MAX2(a,b),c) + +static void +do_triangle_ccw(struct llvmpipe_context *llvmpipe, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4], + boolean frontfacing ) +{ + const int rt_width = llvmpipe->framebuffer.cbufs[0]->width; + const int rt_height = llvmpipe->framebuffer.cbufs[0]->height; + + const float y1 = subpixel_snap(v1[0][1]); + const float y2 = subpixel_snap(v2[0][1]); + const float y3 = subpixel_snap(v3[0][1]); + + const float x1 = subpixel_snap(v1[0][0]); + const float x2 = subpixel_snap(v2[0][0]); + const float x3 = subpixel_snap(v3[0][0]); + + struct triangle tri; + float area; + float c1, c2, c3; + int i; + int minx, maxx, miny, maxy; + + tri.llvmpipe = llvmpipe; + + + tri.dx12 = x1 - x2; + tri.dx23 = x2 - x3; + tri.dx31 = x3 - x1; + + tri.dy12 = y1 - y2; + tri.dy23 = y2 - y3; + tri.dy31 = y3 - y1; + + area = (tri.dx12 * tri.dy31 - + tri.dx31 * tri.dy12); + + /* Cull non-ccw and zero-sized triangles. + */ + if (area <= 0 || util_is_inf_or_nan(area)) + return; + + // Bounding rectangle + minx = util_iround(MIN3(x1, x2, x3) - .5); + maxx = util_iround(MAX3(x1, x2, x3) + .5); + miny = util_iround(MIN3(y1, y2, y3) - .5); + maxy = util_iround(MAX3(y1, y2, y3) + .5); + + /* Clamp to framebuffer (or tile) dimensions: + */ + miny = MAX2(0, miny); + minx = MAX2(0, minx); + maxy = MIN2(rt_height, maxy); + maxx = MIN2(rt_width, maxx); + + if (miny == maxy || minx == maxx) + return; + + /* The only divide in this code. Is it really needed? + */ + tri.oneoverarea = 1.0f / area; + + /* Setup parameter interpolants: + */ + setup_tri_coefficients( llvmpipe, &tri, v1, v2, v3, frontfacing ); + + for (i = 0; i < Elements(tri.quad); i++) { + tri.quad[i].coef = tri.coef; + tri.quad[i].posCoef = &tri.position_coef; + } + + /* half-edge constants, will be interated over the whole + * rendertarget. + */ + c1 = tri.dy12 * x1 - tri.dx12 * y1; + c2 = tri.dy23 * x2 - tri.dx23 * y2; + c3 = tri.dy31 * x3 - tri.dx31 * y3; + + /* correct for top-left fill convention: + */ + if (tri.dy12 < 0 || (tri.dy12 == 0 && tri.dx12 > 0)) c1++; + if (tri.dy23 < 0 || (tri.dy23 == 0 && tri.dx23 > 0)) c2++; + if (tri.dy31 < 0 || (tri.dy31 == 0 && tri.dx31 > 0)) c3++; + + /* find trivial reject offsets for each edge for a single-pixel + * sized block. These will be scaled up at each recursive level to + * match the active blocksize. Scaling in this way works best if + * the blocks are square. + */ + tri.eo1 = 0; + if (tri.dy12 < 0) tri.eo1 -= tri.dy12; + if (tri.dx12 > 0) tri.eo1 += tri.dx12; + + tri.eo2 = 0; + if (tri.dy23 < 0) tri.eo2 -= tri.dy23; + if (tri.dx23 > 0) tri.eo2 += tri.dx23; + + tri.eo3 = 0; + if (tri.dy31 < 0) tri.eo3 -= tri.dy31; + if (tri.dx31 > 0) tri.eo3 += tri.dx31; + + /* Calculate trivial accept offsets from the above. + */ + tri.ei1 = tri.dx12 - tri.dy12 - tri.eo1; + tri.ei2 = tri.dx23 - tri.dy23 - tri.eo2; + tri.ei3 = tri.dx31 - tri.dy31 - tri.eo3; + + minx &= ~(BLOCKSIZE-1); /* aligned blocks */ + miny &= ~(BLOCKSIZE-1); /* aligned blocks */ + + c1 += tri.dx12 * miny - tri.dy12 * minx; + c2 += tri.dx23 * miny - tri.dy23 * minx; + c3 += tri.dx31 * miny - tri.dy31 * minx; + + if ((miny & ~15) == (maxy & ~15) && + (minx & ~15) == (maxx & ~15)) + { + const int step = 2; + + float xstep1 = -step * tri.dy12; + float xstep2 = -step * tri.dy23; + float xstep3 = -step * tri.dy31; + + float ystep1 = step * tri.dx12; + float ystep2 = step * tri.dx23; + float ystep3 = step * tri.dx31; + + float eo1 = tri.eo1 * step; + float eo2 = tri.eo2 * step; + float eo3 = tri.eo3 * step; + + int x, y; + + /* Subdivide space into NxM blocks, where each block is square and + * power-of-four in dimension. + * + * Trivially accept or reject blocks, else jump to per-pixel + * examination above. + */ + for (y = miny; y < maxy; y += step) + { + float cx1 = c1; + float cx2 = c2; + float cx3 = c3; + + for (x = minx; x < maxx; x += step) + { + if (cx1 + eo1 < 0 || + cx2 + eo2 < 0 || + cx3 + eo3 < 0) + { + } + else + { + do_quad(&tri, x, y, cx1, cx2, cx3); + } + + /* Iterate cx values across the region: + */ + cx1 += xstep1; + cx2 += xstep2; + cx3 += xstep3; + } + + /* Iterate c values down the region: + */ + c1 += ystep1; + c2 += ystep2; + c3 += ystep3; + } + } + else + { + const int step = BLOCKSIZE; + + float ei1 = tri.ei1 * step; + float ei2 = tri.ei2 * step; + float ei3 = tri.ei3 * step; + + float eo1 = tri.eo1 * step; + float eo2 = tri.eo2 * step; + float eo3 = tri.eo3 * step; + + float xstep1 = -step * tri.dy12; + float xstep2 = -step * tri.dy23; + float xstep3 = -step * tri.dy31; + + float ystep1 = step * tri.dx12; + float ystep2 = step * tri.dx23; + float ystep3 = step * tri.dx31; + int x, y; + + + /* Subdivide space into NxM blocks, where each block is square and + * power-of-four in dimension. + * + * Trivially accept or reject blocks, else jump to per-pixel + * examination above. + */ + for (y = miny; y < maxy; y += step) + { + float cx1 = c1; + float cx2 = c2; + float cx3 = c3; + boolean in = false; + + for (x = minx; x < maxx; x += step) + { + if (cx1 + eo1 < 0 || + cx2 + eo2 < 0 || + cx3 + eo3 < 0) + { + /* do nothing */ + if (in) + break; + } + else if (cx1 + ei1 > 0 && + cx2 + ei2 > 0 && + cx3 + ei3 > 0) + { + in = TRUE; + block_full(&tri, x, y); /* trivial accept */ + } + else + { + in = TRUE; + // block_full(&tri, x, y); /* trivial accept */ + do_block(&tri, x, y, cx1, cx2, cx3); + } + + /* Iterate cx values across the region: + */ + cx1 += xstep1; + cx2 += xstep2; + cx3 += xstep3; + } + + /* Iterate c values down the region: + */ + c1 += ystep1; + c2 += ystep2; + c3 += ystep3; + } + } +} + +static void triangle_cw( struct llvmpipe_context *llvmpipe, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + do_triangle_ccw( llvmpipe, v1, v0, v2, !llvmpipe->ccw_is_frontface ); +} + +static void triangle_ccw( struct llvmpipe_context *llvmpipe, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + do_triangle_ccw( llvmpipe, v0, v1, v2, llvmpipe->ccw_is_frontface ); +} + +static void triangle_both( struct llvmpipe_context *llvmpipe, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + /* edge vectors e = v0 - v2, f = v1 - v2 */ + const float ex = v0[0][0] - v2[0][0]; + const float ey = v0[0][1] - v2[0][1]; + const float fx = v1[0][0] - v2[0][0]; + const float fy = v1[0][1] - v2[0][1]; + + /* det = cross(e,f).z */ + if (ex * fy - ey * fx < 0) + triangle_ccw( llvmpipe, v0, v1, v2 ); + else + triangle_cw( llvmpipe, v0, v1, v2 ); +} + +static void triangle_nop( struct llvmpipe_context *llvmpipe, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ +} + +/** + * Do setup for triangle rasterization, then render the triangle. + */ +void setup_prepare_tri( struct llvmpipe_context *llvmpipe ) +{ + llvmpipe->ccw_is_frontface = (llvmpipe->rasterizer->front_winding == + PIPE_WINDING_CW); + + switch (llvmpipe->rasterizer->cull_mode) { + case PIPE_WINDING_NONE: + llvmpipe->triangle = triangle_both; + break; + case PIPE_WINDING_CCW: + llvmpipe->triangle = triangle_cw; + break; + case PIPE_WINDING_CW: + llvmpipe->triangle = triangle_ccw; + break; + default: + llvmpipe->triangle = triangle_nop; + break; + } +} + + diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 30fb41ea65..31eaadda21 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -67,24 +67,19 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) const struct lp_fragment_shader *lpfs = llvmpipe->fs; const enum interp_mode colorInterp = llvmpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; + struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf; + const uint num = draw_num_vs_outputs(llvmpipe->draw); uint i; - if (llvmpipe->vbuf) { - /* if using the post-transform vertex buffer, tell draw_vbuf to - * simply emit the whole post-xform vertex as-is: - */ - struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf; - const uint num = draw_num_vs_outputs(llvmpipe->draw); - uint i; - - /* No longer any need to try and emit draw vertex_header info. - */ - vinfo_vbuf->num_attribs = 0; - for (i = 0; i < num; i++) { - draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); - } - draw_compute_vertex_size(vinfo_vbuf); + /* Tell draw_vbuf to simply emit the whole post-xform vertex + * as-is. No longer any need to try and emit draw vertex_header + * info. + */ + vinfo_vbuf->num_attribs = 0; + for (i = 0; i < num; i++) { + draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); } + draw_compute_vertex_size(vinfo_vbuf); /* * Loop over fragment shader inputs, searching for the matching output diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c deleted file mode 100644 index ec3e002d62..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c +++ /dev/null @@ -1,353 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Texture tile caching. - * - * Author: - * Brian Paul - */ - -#include "pipe/p_inlines.h" -#include "util/u_memory.h" -#include "util/u_math.h" -#include "util/u_tile.h" -#include "util/u_rect.h" -#include "lp_context.h" -#include "lp_surface.h" -#include "lp_texture.h" -#include "lp_tile_soa.h" -#include "lp_tile_cache.h" - - -#define MAX_WIDTH 4096 -#define MAX_HEIGHT 4096 - - -enum llvmpipe_tile_status -{ - LP_TILE_STATUS_UNDEFINED = 0, - LP_TILE_STATUS_CLEAR = 1, - LP_TILE_STATUS_DEFINED = 2 -}; - - -struct llvmpipe_cached_tile -{ - enum llvmpipe_tile_status status; - - /** color in SOA format */ - uint8_t *color; -}; - - -struct llvmpipe_tile_cache -{ - struct pipe_screen *screen; - struct pipe_surface *surface; /**< the surface we're caching */ - struct pipe_transfer *transfer; - void *transfer_map; - - struct llvmpipe_cached_tile entries[MAX_WIDTH/TILE_SIZE][MAX_HEIGHT/TILE_SIZE]; - - uint8_t clear_color[4]; /**< for color bufs */ - uint clear_val; /**< for z+stencil, or packed color clear value */ - - struct llvmpipe_cached_tile *last_tile; /**< most recently retrieved tile */ -}; - - -struct llvmpipe_tile_cache * -lp_create_tile_cache( struct pipe_screen *screen ) -{ - struct llvmpipe_tile_cache *tc; - int maxLevels, maxTexSize; - - /* sanity checking: max sure MAX_WIDTH/HEIGHT >= largest texture image */ - maxLevels = screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS); - maxTexSize = 1 << (maxLevels - 1); - assert(MAX_WIDTH >= maxTexSize); - - tc = CALLOC_STRUCT( llvmpipe_tile_cache ); - if(!tc) - return NULL; - - tc->screen = screen; - - return tc; -} - - -void -lp_destroy_tile_cache(struct llvmpipe_tile_cache *tc) -{ - struct pipe_screen *screen; - unsigned x, y; - - for (y = 0; y < MAX_HEIGHT; y += TILE_SIZE) { - for (x = 0; x < MAX_WIDTH; x += TILE_SIZE) { - struct llvmpipe_cached_tile *tile = &tc->entries[y/TILE_SIZE][x/TILE_SIZE]; - - if(tile->color) - align_free(tile->color); - } - } - - if (tc->transfer) { - screen = tc->transfer->texture->screen; - screen->tex_transfer_destroy(tc->transfer); - } - - FREE( tc ); -} - - -/** - * Specify the surface to cache. - */ -void -lp_tile_cache_set_surface(struct llvmpipe_tile_cache *tc, - struct pipe_surface *ps) -{ - if (tc->transfer) { - struct pipe_screen *screen = tc->transfer->texture->screen; - - if (ps == tc->surface) - return; - - if (tc->transfer_map) { - screen->transfer_unmap(screen, tc->transfer); - tc->transfer_map = NULL; - } - - screen->tex_transfer_destroy(tc->transfer); - tc->transfer = NULL; - } - - tc->surface = ps; - - if (ps) { - struct pipe_screen *screen = ps->texture->screen; - unsigned x, y; - - tc->transfer = screen->get_tex_transfer(screen, ps->texture, ps->face, - ps->level, ps->zslice, - PIPE_TRANSFER_READ_WRITE, - 0, 0, ps->width, ps->height); - - for (y = 0; y < ps->height; y += TILE_SIZE) { - for (x = 0; x < ps->width; x += TILE_SIZE) { - struct llvmpipe_cached_tile *tile = &tc->entries[y/TILE_SIZE][x/TILE_SIZE]; - - tile->status = LP_TILE_STATUS_UNDEFINED; - - if(!tile->color) - tile->color = align_malloc( TILE_SIZE*TILE_SIZE*NUM_CHANNELS, 16 ); - } - } - } -} - - -/** - * Return the transfer being cached. - */ -struct pipe_surface * -lp_tile_cache_get_surface(struct llvmpipe_tile_cache *tc) -{ - return tc->surface; -} - - -void -lp_tile_cache_map_transfers(struct llvmpipe_tile_cache *tc) -{ - if (tc->transfer && !tc->transfer_map) - tc->transfer_map = tc->screen->transfer_map(tc->screen, tc->transfer); -} - - -void -lp_tile_cache_unmap_transfers(struct llvmpipe_tile_cache *tc) -{ - if (tc->transfer_map) { - tc->screen->transfer_unmap(tc->screen, tc->transfer); - tc->transfer_map = NULL; - } -} - - -/** - * Set a tile to a solid color. - */ -static void -clear_tile(struct llvmpipe_cached_tile *tile, - uint8_t clear_color[4]) -{ - if (clear_color[0] == clear_color[1] && - clear_color[1] == clear_color[2] && - clear_color[2] == clear_color[3]) { - memset(tile->color, clear_color[0], TILE_SIZE * TILE_SIZE * 4); - } - else { - uint x, y, chan; - for (y = 0; y < TILE_SIZE; y++) - for (x = 0; x < TILE_SIZE; x++) - for (chan = 0; chan < 4; ++chan) - TILE_PIXEL(tile->color, x, y, chan) = clear_color[chan]; - } -} - - -/** - * Flush the tile cache: write all dirty tiles back to the transfer. - * any tiles "flagged" as cleared will be "really" cleared. - */ -void -lp_flush_tile_cache(struct llvmpipe_tile_cache *tc) -{ - struct pipe_transfer *pt = tc->transfer; - unsigned x, y; - - if(!pt) - return; - - assert(tc->transfer_map); - - /* push the tile to all positions marked as clear */ - for (y = 0; y < pt->height; y += TILE_SIZE) { - for (x = 0; x < pt->width; x += TILE_SIZE) { - struct llvmpipe_cached_tile *tile = &tc->entries[y/TILE_SIZE][x/TILE_SIZE]; - - if(tile->status != LP_TILE_STATUS_UNDEFINED) { - unsigned w = TILE_SIZE; - unsigned h = TILE_SIZE; - - if (!pipe_clip_tile(x, y, &w, &h, pt)) { - switch(tile->status) { - case LP_TILE_STATUS_CLEAR: - /* Actually clear the tiles which were flagged as being in a - * clear state. */ - util_fill_rect(tc->transfer_map, &pt->block, pt->stride, - x, y, w, h, - tc->clear_val); - break; - - case LP_TILE_STATUS_DEFINED: - lp_tile_write_4ub(pt->format, - tile->color, - tc->transfer_map, pt->stride, - x, y, w, h); - break; - - default: - assert(0); - break; - } - } - - tile->status = LP_TILE_STATUS_UNDEFINED; - } - } - } -} - - -/** - * Get a tile from the cache. - * \param x, y position of tile, in pixels - */ -void * -lp_get_cached_tile(struct llvmpipe_tile_cache *tc, - unsigned x, unsigned y ) -{ - struct llvmpipe_cached_tile *tile = &tc->entries[y/TILE_SIZE][x/TILE_SIZE]; - struct pipe_transfer *pt = tc->transfer; - - assert(tc->surface); - assert(tc->transfer); - - switch(tile->status) { - case LP_TILE_STATUS_CLEAR: - /* don't get tile from framebuffer, just clear it */ - clear_tile(tile, tc->clear_color); - tile->status = LP_TILE_STATUS_DEFINED; - break; - - case LP_TILE_STATUS_UNDEFINED: { - unsigned w = TILE_SIZE; - unsigned h = TILE_SIZE; - - x &= ~(TILE_SIZE - 1); - y &= ~(TILE_SIZE - 1); - - if (!pipe_clip_tile(x, y, &w, &h, tc->transfer)) - lp_tile_read_4ub(pt->format, - tile->color, - tc->transfer_map, tc->transfer->stride, - x, y, w, h); - - tile->status = LP_TILE_STATUS_DEFINED; - break; - } - - case LP_TILE_STATUS_DEFINED: - /* nothing to do */ - break; - } - - return tile->color; -} - - -/** - * When a whole surface is being cleared to a value we can avoid - * fetching tiles above. - * Save the color and set a 'clearflag' for each tile of the screen. - */ -void -lp_tile_cache_clear(struct llvmpipe_tile_cache *tc, const float *rgba, - uint clearValue) -{ - struct pipe_transfer *pt = tc->transfer; - const unsigned w = pt->width; - const unsigned h = pt->height; - unsigned x, y, chan; - - for(chan = 0; chan < 4; ++chan) - tc->clear_color[chan] = float_to_ubyte(rgba[chan]); - - tc->clear_val = clearValue; - - /* push the tile to all positions marked as clear */ - for (y = 0; y < h; y += TILE_SIZE) { - for (x = 0; x < w; x += TILE_SIZE) { - struct llvmpipe_cached_tile *tile = &tc->entries[y/TILE_SIZE][x/TILE_SIZE]; - tile->status = LP_TILE_STATUS_CLEAR; - } - } -} diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.h b/src/gallium/drivers/llvmpipe/lp_tile_cache.h deleted file mode 100644 index 161bab3799..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.h +++ /dev/null @@ -1,71 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef LP_TILE_CACHE_H -#define LP_TILE_CACHE_H - - -#include "pipe/p_compiler.h" -#include "lp_tile_soa.h" - - -struct llvmpipe_tile_cache; /* opaque */ - - -extern struct llvmpipe_tile_cache * -lp_create_tile_cache( struct pipe_screen *screen ); - -extern void -lp_destroy_tile_cache(struct llvmpipe_tile_cache *tc); - -extern void -lp_tile_cache_set_surface(struct llvmpipe_tile_cache *tc, - struct pipe_surface *lps); - -extern struct pipe_surface * -lp_tile_cache_get_surface(struct llvmpipe_tile_cache *tc); - -extern void -lp_tile_cache_map_transfers(struct llvmpipe_tile_cache *tc); - -extern void -lp_tile_cache_unmap_transfers(struct llvmpipe_tile_cache *tc); - -extern void -lp_flush_tile_cache(struct llvmpipe_tile_cache *tc); - -extern void -lp_tile_cache_clear(struct llvmpipe_tile_cache *tc, const float *rgba, - uint clearValue); - -extern void * -lp_get_cached_tile(struct llvmpipe_tile_cache *tc, - unsigned x, unsigned y ); - - -#endif /* LP_TILE_CACHE_H */ - -- cgit v1.2.3 From e529170c11d3cb5812aabeff0a6ee2d7a2ea66f2 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 8 Oct 2009 11:47:33 +0100 Subject: llvmpipe: more wipping --- src/gallium/drivers/llvmpipe/lp_prim_vbuf.c | 2 +- src/gallium/drivers/llvmpipe/lp_rast.c | 119 ++++++++ src/gallium/drivers/llvmpipe/lp_rast.h | 129 ++++++++ src/gallium/drivers/llvmpipe/lp_rast_priv.h | 31 ++ src/gallium/drivers/llvmpipe/lp_rast_tri.c | 348 ++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_rasterizer.c | 157 ---------- src/gallium/drivers/llvmpipe/lp_rasterizer.h | 112 ------- src/gallium/drivers/llvmpipe/lp_setup.c | 17 ++ src/gallium/drivers/llvmpipe/lp_setup_rasterize.c | 19 +- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 6 +- src/gallium/drivers/llvmpipe/lp_state_derived.c | 27 ++ 11 files changed, 691 insertions(+), 276 deletions(-) create mode 100644 src/gallium/drivers/llvmpipe/lp_rast.c create mode 100644 src/gallium/drivers/llvmpipe/lp_rast.h create mode 100644 src/gallium/drivers/llvmpipe/lp_rast_priv.h create mode 100644 src/gallium/drivers/llvmpipe/lp_rast_tri.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_rasterizer.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_rasterizer.h diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c index e244ac9087..8cccb2905b 100644 --- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c @@ -138,7 +138,7 @@ lp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); struct setup_context *setup_ctx = cvbr->setup; - llvmpipe_setup_prepare( setup_ctx ); + llvmpipe_update_state( setup_ctx->llvmpipe ); cvbr->llvmpipe->reduced_prim = u_reduced_prim(prim); cvbr->prim = prim; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c new file mode 100644 index 0000000000..4771f821b3 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -0,0 +1,119 @@ + +struct lp_rasterizer *lp_rast_create( void ) +{ + return CALLOC_STRUCT(lp_rasterizer); +} + +void lp_rast_bind_surfaces( struct lp_rasterizer *, + struct pipe_surface *color, + struct pipe_surface *zstencil, + const float *clear_color, + double clear_depth, + unsigned clear_stencil) +{ + pipe_surface_reference(&rast->state.color, color); + pipe_surface_reference(&rast->state.depth, depth); + rast->state.clear_color = util_pack_8888(clear_color); + rast->state.clear_depth = clear_depth * 0xffffffff; + rast->state.clear_stencil = clear_stencil; +} + +/* Begining of each tile: + */ +void lp_rast_start_tile( struct lp_rasterizer *, + unsigned x, + unsigned y ) +{ + rast->x = x; + rast->y = y; +} + +void lp_rast_clear_color( struct lp_rasterizer *rast ) +{ + const unsigned clear_color = rast->state.clear_color; + unsigned i, j; + + for (i = 0; i < TILESIZE; i++) + for (j = 0; j < TILESIZE; j++) + rast->tile[i][j] = clear_color; +} + +void lp_rast_clear_depth( struct lp_rasterizer *rast ) +{ + const unsigned clear_depth = rast->state.clear_depth; + unsigned i, j; + + for (i = 0; i < TILESIZE; i++) + for (j = 0; j < TILESIZE; j++) + rast->tile[i][j] = clear_depth; +} + +void lp_rast_clear_stencil( struct lp_rasterizer *rast ) +{ + const unsigned clear_stencil = rast->state.clear_stencil; + + memset(rast->tile.stencil, clear_stencil, sizeof rast->tile.stencil ); +} + +void lp_rast_load_color( struct lp_rasterizer *rast ) +{ + /* call u_tile func to load colors from surface */ +} + +void lp_rast_load_zstencil( struct lp_rasterizer *rast ) +{ + /* call u_tile func to load depth (and stencil?) from surface */ +} + +/* Within a tile: + */ +void lp_rast_set_state( struct lp_rasterizer *rast, + const struct lp_rast_state *state ) +{ + rast->shader_state = state; + lp->quad.first->begin( lp->quad.first ); + +} + + +void lp_rast_shade_tile( struct lp_rasterizer *rast, + const struct lp_rast_shader_inputs *inputs ) +{ + /* Set up the silly quad coef pointers + */ + for (i = 0; i < 4; i++) { + rast->quads[i].posCoef = &inputs->posCoef; + rast->quads[i].coef = inputs->coef; + } + + /* Use the existing preference for 8x2 (four quads) shading: + */ + for (i = 0; i < TILESIZE; i += 8) { + for (j = 0; j < TILESIZE; j += 2) { + rast->shader_state.shade( inputs->jc, + rast->x + i, + rast->y + j, + rast->quads, 4 ); + } + } +} + +/* End of tile: + */ +void lp_rast_store_color( struct lp_rasterizer *rast ) +{ + /* call u_tile func to store colors to surface */ +} + +void lp_rast_store_zstencil( struct lp_rasterizer *rast ) +{ + /* call u_tile func to store depth/stencil to surface */ +} + +/* Shutdown: + */ +void lp_rast_destroy( struct lp_rasterizer *rast ) +{ + FREE(rast); +} + diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h new file mode 100644 index 0000000000..8f4bd52c9e --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -0,0 +1,129 @@ + +#ifndef LP_RAST_H +#define LP_RAST_H + +/* Initially create and program a single rasterizer directly. Later + * will want multiple of these, one or two per core. At that stage + * will probably pass command buffers into the rasterizers rather than + * individual function calls like this. + */ +struct lp_rasterizer; + +struct lp_rast_state { + /* State for the shader: + */ + struct lp_jit_context jc; + + /* The shader itself. Probably we also need to pass a pointer to + * the tile color/z/stencil data somehow: + */ + void (*run)( struct lp_jit_context *jc, + struct quad_header **quads, + unsigned nr ); +}; + +/* Coefficients necessary to run the shader at a given location: + */ +struct lp_rast_shader_inputs { + + /* Current rasterizer state: + */ + const struct lp_rast_state *state; + + /* Attribute interpolation: + */ + struct tgsi_interp_coef position_coef; + struct tgsi_interp_coef *coef; +}; + + +/* Rasterization information for a triangle known to be in this bin, + * plus inputs to run the shader: + */ +struct lp_rast_triangle { + /* one-pixel sized trivial accept offsets for each plane */ + float ei1; + float ei2; + float ei3; + + /* one-pixel sized trivial reject offsets for each plane */ + float eo1; + float eo2; + float eo3; + + /* y deltas for vertex pairs */ + float dy12; + float dy23; + float dy31; + + /* x deltas for vertex pairs */ + float dx12; + float dx23; + float dx31; + + /* State to run the shader: */ + struct lp_rast_shader_inputs inputs; +}; + + + +struct lp_rasterizer *lp_rast_create( void ); + +void lp_rast_bind_surfaces( struct lp_rasterizer *, + struct pipe_surface *color, + struct pipe_surface *zstencil, + const float *clear_color, + double clear_depth, + unsigned clear_stencil); + +/* Begining of each tile: + */ +void lp_rast_start_tile( struct lp_rasterizer *, + unsigned x, + unsigned y ); + + + +union lp_rast_cmd_arg { + const struct lp_rast_shader_inputs *shade_tile; + const struct lp_rast_triangle *triangle; + const struct lp_rast_state *set_state; +}; + + +/* Binnable Commands: + */ +void lp_rast_clear_color( struct lp_rasterizer *, + const union lp_rast_cmd_arg *); + +void lp_rast_clear_zstencil( struct lp_rasterizer *, + const union lp_rast_cmd_arg *); + +void lp_rast_load_color( struct lp_rasterizer *, + const union lp_rast_cmd_arg *); + +void lp_rast_load_zstencil( struct lp_rasterizer *, + const union lp_rast_cmd_arg *); + +void lp_rast_set_state( struct lp_rasterizer *, + const union lp_rast_cmd_arg * ); + +void lp_rast_triangle( struct lp_rasterizer *, + const union lp_rast_cmd_arg * ); + +void lp_rast_shade_tile( struct lp_rasterizer *, + const union lp_rast_cmd_arg * ); + +void lp_rast_store_color( struct lp_rasterizer *, + const union lp_rast_cmd_arg *); + +void lp_rast_store_zstencil( struct lp_rasterizer *, + const union lp_rast_cmd_arg *); + + +/* Shutdown: + */ +void lp_rast_destroy( struct lp_rasterizer * ); + + +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h new file mode 100644 index 0000000000..538ec22551 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -0,0 +1,31 @@ +#ifndef LP_RAST_PRIV_H +#define LP_RAST_PRIV_H + +#include "lp_rast.h" + +struct lp_rasterizer { + + /* We can choose whatever layout for the internal tile storage we + * prefer: + */ + struct { + unsigned color[TILESIZE][TILESIZE]; + unsigned depth[TILESIZE][TILESIZE]; + char stencil[TILESIZE][TILESIZE]; + } tile; + + + unsigned x; + unsigned y; + + + struct { + struct pipe_surface *color; + struct pipe_surface *zstencil; + unsigned clear_color; + unsigned clear_depth; + char clear_stencil; + } state; +}; + +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c new file mode 100644 index 0000000000..4b7b3719de --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -0,0 +1,348 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Rasterization for binned triangles within a tile + */ + +#include "lp_context.h" +#include "lp_quad.h" +#include "lp_quad_pipe.h" +#include "lp_setup.h" +#include "lp_state.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vertex.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_thread.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#define BLOCKSIZE 4 + + +/* Convert 8x8 block into four runs of quads and render each in turn. + */ +#if (BLOCKSIZE == 8) +static void block_full( struct triangle *tri, int x, int y ) +{ + struct quad_header *ptrs[4]; + int i; + + tri->quad[0].input.x0 = x + 0; + tri->quad[1].input.x0 = x + 2; + tri->quad[2].input.x0 = x + 4; + tri->quad[3].input.x0 = x + 6; + + for (i = 0; i < 4; i++, y += 2) { + tri->quad[0].inout.mask = 0xf; + tri->quad[1].inout.mask = 0xf; + tri->quad[2].inout.mask = 0xf; + tri->quad[3].inout.mask = 0xf; + + tri->quad[0].input.y0 = y; + tri->quad[1].input.y0 = y; + tri->quad[2].input.y0 = y; + tri->quad[3].input.y0 = y; + + /* XXX: don't bother with this ptrs business */ + ptrs[0] = &tri->quad[0]; + ptrs[1] = &tri->quad[1]; + ptrs[2] = &tri->quad[2]; + ptrs[3] = &tri->quad[3]; + + tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 4 ); + } +} +#else +static void block_full( struct triangle *tri, int x, int y ) +{ + struct quad_header *ptrs[4]; + int iy; + + tri->quad[0].input.x0 = x + 0; + tri->quad[1].input.x0 = x + 2; + + for (iy = 0; iy < 4; iy += 2) { + tri->quad[0].inout.mask = 0xf; + tri->quad[1].inout.mask = 0xf; + + tri->quad[0].input.y0 = y + iy; + tri->quad[1].input.y0 = y + iy; + + /* XXX: don't bother with this ptrs business */ + ptrs[0] = &tri->quad[0]; + ptrs[1] = &tri->quad[1]; + + tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 2 ); + } +} +#endif + +static void +do_quad( struct lp_rasterizer *rast, + int x, int y, + float c1, float c2, float c3 ) +{ + struct triangle *tri = rast->tri; + struct quad_header *quad = &rast->quad[0]; + + float xstep1 = -tri->dy12; + float xstep2 = -tri->dy23; + float xstep3 = -tri->dy31; + + float ystep1 = tri->dx12; + float ystep2 = tri->dx23; + float ystep3 = tri->dx31; + + quad->input.x0 = x; + quad->input.y0 = y; + quad->inout.mask = 0; + + if (c1 > 0 && + c2 > 0 && + c3 > 0) + quad->inout.mask |= 1; + + if (c1 + xstep1 > 0 && + c2 + xstep2 > 0 && + c3 + xstep3 > 0) + quad->inout.mask |= 2; + + if (c1 + ystep1 > 0 && + c2 + ystep2 > 0 && + c3 + ystep3 > 0) + quad->inout.mask |= 4; + + if (c1 + ystep1 + xstep1 > 0 && + c2 + ystep2 + xstep2 > 0 && + c3 + ystep3 + xstep3 > 0) + quad->inout.mask |= 8; + + if (quad->inout.mask) + rast->state->run( rast->state->state, &quad, 1 ); +} + +/* Evaluate each pixel in a block, generate a mask and possibly render + * the quad: + */ +static void +do_block( struct triangle *tri, + int x, int y, + float c1, + float c2, + float c3 ) +{ + const int step = 2; + + float xstep1 = -step * tri->dy12; + float xstep2 = -step * tri->dy23; + float xstep3 = -step * tri->dy31; + + float ystep1 = step * tri->dx12; + float ystep2 = step * tri->dx23; + float ystep3 = step * tri->dx31; + + int ix, iy; + + for (iy = 0; iy < BLOCKSIZE; iy += 2) { + float cx1 = c1; + float cx2 = c2; + float cx3 = c3; + + for (ix = 0; ix < BLOCKSIZE; ix += 2) { + + do_quad(tri, x+ix, y+iy, cx1, cx2, cx3); + + cx1 += xstep1; + cx2 += xstep2; + cx3 += xstep3; + } + + c1 += ystep1; + c2 += ystep2; + c3 += ystep3; + } +} + + + +/* Scan the tile in chunks and figure out which pixels to rasterize + * for this triangle: + */ +void lp_rast_triangle( struct lp_rasterizer *rast, + const struct lp_rast_triangle *tri ) +{ + int minx, maxx, miny, maxy; + + /* Clamp to tile dimensions: + */ + minx = MAX2(tri->maxx, rast->x); + miny = MAX2(tri->miny, rast->y); + maxx = MIN2(tri->maxx, rast->x + TILESIZE); + maxy = MIN2(tri->maxy, rast->y + TILESIZE); + + if (miny == maxy || + minx == maxx) { + debug_printf("%s: non-intersecting triangle in bin\n", __FUNCTION__); + //assert(0); + return; + } + + /* Bind parameter interpolants: + */ + for (i = 0; i < Elements(rast->quad); i++) { + rast->quad[i].coef = tri->coef; + rast->quad[i].posCoef = &tri->position_coef; + } + + /* Small area? + */ + if (miny + 16 > maxy && + minx + 16 > maxx) + { + const int step = 2; + + float xstep1 = -step * tri->dy12; + float xstep2 = -step * tri->dy23; + float xstep3 = -step * tri->dy31; + + float ystep1 = step * tri->dx12; + float ystep2 = step * tri->dx23; + float ystep3 = step * tri->dx31; + + float eo1 = tri->eo1 * step; + float eo2 = tri->eo2 * step; + float eo3 = tri->eo3 * step; + + int x, y; + + minx &= ~(step-1); + maxx &= ~(step-1); + + /* Subdivide space into NxM blocks, where each block is square and + * power-of-four in dimension. + * + * Trivially accept or reject blocks, else jump to per-pixel + * examination above. + */ + for (y = miny; y < maxy; y += step) + { + float cx1 = c1; + float cx2 = c2; + float cx3 = c3; + + for (x = minx; x < maxx; x += step) + { + if (cx1 + eo1 < 0 || + cx2 + eo2 < 0 || + cx3 + eo3 < 0) + { + } + else + { + do_quad(&tri, x, y, cx1, cx2, cx3); + } + + /* Iterate cx values across the region: + */ + cx1 += xstep1; + cx2 += xstep2; + cx3 += xstep3; + } + + /* Iterate c values down the region: + */ + c1 += ystep1; + c2 += ystep2; + c3 += ystep3; + } + } + else + { + const int step = BLOCKSIZE; + + float ei1 = tri->ei1 * step; + float ei2 = tri->ei2 * step; + float ei3 = tri->ei3 * step; + + float eo1 = tri->eo1 * step; + float eo2 = tri->eo2 * step; + float eo3 = tri->eo3 * step; + + float xstep1 = -step * tri->dy12; + float xstep2 = -step * tri->dy23; + float xstep3 = -step * tri->dy31; + + float ystep1 = step * tri->dx12; + float ystep2 = step * tri->dx23; + float ystep3 = step * tri->dx31; + int x, y; + + minx &= ~(step-1); + miny &= ~(step-1); + + for (y = miny; y < maxy; y += step) + { + float cx1 = c1; + float cx2 = c2; + float cx3 = c3; + + for (x = minx; x < maxx; x += step) + { + if (cx1 + eo1 < 0 || + cx2 + eo2 < 0 || + cx3 + eo3 < 0) + { + } + else if (cx1 + ei1 > 0 && + cx2 + ei2 > 0 && + cx3 + ei3 > 0) + { + block_full(&tri, x, y); /* trivial accept */ + } + else + { + do_block(&tri, x, y, cx1, cx2, cx3); + } + + /* Iterate cx values across the region: + */ + cx1 += xstep1; + cx2 += xstep2; + cx3 += xstep3; + } + + /* Iterate c values down the region: + */ + c1 += ystep1; + c2 += ystep2; + c3 += ystep3; + } + } +} + diff --git a/src/gallium/drivers/llvmpipe/lp_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_rasterizer.c deleted file mode 100644 index 089ea59729..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_rasterizer.c +++ /dev/null @@ -1,157 +0,0 @@ - -struct lp_rasterizer { - - /* We can choose whatever layout for the internal tile storage we - * prefer: - */ - struct { - unsigned color[TILESIZE][TILESIZE]; - unsigned depth[TILESIZE][TILESIZE]; - char stencil[TILESIZE][TILESIZE]; - } tile; - - - unsigned x; - unsigned y; - - - struct { - struct pipe_surface *color; - struct pipe_surface *zstencil; - unsigned clear_color; - unsigned clear_depth; - char clear_stencil; - } state; -}; - -struct lp_rasterizer *lp_rast_create( void ) -{ - return CALLOC_STRUCT(lp_rasterizer); -} - -void lp_rast_bind_surfaces( struct lp_rasterizer *, - struct pipe_surface *color, - struct pipe_surface *zstencil, - const float *clear_color, - double clear_depth, - unsigned clear_stencil) -{ - pipe_surface_reference(&rast->state.color, color); - pipe_surface_reference(&rast->state.depth, depth); - rast->state.clear_color = util_pack_8888(clear_color); - rast->state.clear_depth = clear_depth * 0xffffffff; - rast->state.clear_stencil = clear_stencil; -} - -/* Begining of each tile: - */ -void lp_rast_start_tile( struct lp_rasterizer *, - unsigned x, - unsigned y ) -{ - rast->x = x; - rast->y = y; -} - -void lp_rast_clear_color( struct lp_rasterizer *rast ) -{ - const unsigned clear_color = rast->state.clear_color; - unsigned i, j; - - for (i = 0; i < TILESIZE; i++) - for (j = 0; j < TILESIZE; j++) - rast->tile[i][j] = clear_color; -} - -void lp_rast_clear_depth( struct lp_rasterizer *rast ) -{ - const unsigned clear_depth = rast->state.clear_depth; - unsigned i, j; - - for (i = 0; i < TILESIZE; i++) - for (j = 0; j < TILESIZE; j++) - rast->tile[i][j] = clear_depth; -} - -void lp_rast_clear_stencil( struct lp_rasterizer *rast ) -{ - const unsigned clear_stencil = rast->state.clear_stencil; - - memset(rast->tile.stencil, clear_stencil, sizeof rast->tile.stencil ); -} - -void lp_rast_load_color( struct lp_rasterizer *rast ) -{ - /* call u_tile func to load colors from surface */ -} - -void lp_rast_load_zstencil( struct lp_rasterizer *rast ) -{ - /* call u_tile func to load depth (and stencil?) from surface */ -} - -/* Within a tile: - */ -void lp_rast_set_state( struct lp_rasterizer *rast, - const struct lp_rast_state *state ) -{ - rast->shader_state = state; -} - -void lp_rast_triangle( struct lp_rasterizer *rast, - const struct lp_rast_triangle *inputs ) -{ - /* Set up the silly quad coef pointers - */ - for (i = 0; i < 4; i++) { - rast->quads[i].posCoef = inputs->posCoef; - rast->quads[i].coef = inputs->coef; - } - - /* Scan the tile in 4x4 chunks (?) and figure out which bits to - * rasterize: - */ - -} - -void lp_rast_shade_tile( struct lp_rasterizer *rast, - const struct lp_rast_shader_inputs *inputs ) -{ - /* Set up the silly quad coef pointers - */ - for (i = 0; i < 4; i++) { - rast->quads[i].posCoef = inputs->posCoef; - rast->quads[i].coef = inputs->coef; - } - - /* Use the existing preference for 8x2 (four quads) shading: - */ - for (i = 0; i < TILESIZE; i += 8) { - for (j = 0; j < TILESIZE; j += 2) { - rast->shader_state.shade( inputs->jc, - rast->x + i, - rast->y + j, - rast->quads, 4 ); - } - } -} - -/* End of tile: - */ -void lp_rast_store_color( struct lp_rasterizer *rast ) -{ - /* call u_tile func to store colors to surface */ -} - -void lp_rast_store_zstencil( struct lp_rasterizer *rast ) -{ - /* call u_tile func to store depth/stencil to surface */ -} - -/* Shutdown: - */ -void lp_rast_destroy( struct lp_rasterizer *rast ) -{ - FREE(rast); -} - diff --git a/src/gallium/drivers/llvmpipe/lp_rasterizer.h b/src/gallium/drivers/llvmpipe/lp_rasterizer.h deleted file mode 100644 index b3ae06a116..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_rasterizer.h +++ /dev/null @@ -1,112 +0,0 @@ - -/* Initially create and program a single rasterizer directly. Later - * will want multiple of these, one or two per core. At that stage - * will probably pass command buffers into the rasterizers rather than - * individual function calls like this. - */ -struct lp_rasterizer; - -struct lp_rast_state { - /* State: - */ - struct lp_jit_context jc; - - /* Shader itself: - */ -}; - -/* Coefficients necessary to run the shader at a given location: - */ -struct lp_rast_shader_inputs { - - /* Current rasterizer state: - */ - const struct lp_rast_state *state; - - /* Attribute interpolation: - */ - float oneoverarea; - float x1; - float y1; - - struct tgsi_interp_coef position_coef; - struct tgsi_interp_coef *coef; -}; - - -/* Rasterization information for a triangle known to be in this bin, - * plus inputs to run the shader: - */ -struct lp_rast_triangle { - /* one-pixel sized trivial accept offsets for each plane */ - float ei1; - float ei2; - float ei3; - - /* one-pixel sized trivial reject offsets for each plane */ - float eo1; - float eo2; - float eo3; - - /* y deltas for vertex pairs */ - float dy12; - float dy23; - float dy31; - - /* x deltas for vertex pairs */ - float dx12; - float dx23; - float dx31; - - /* State to run the shader: */ - struct lp_rast_shader_inputs inputs; -}; - - - -struct lp_rasterizer *lp_rast_create( void ); - -void lp_rast_bind_surfaces( struct lp_rasterizer *, - struct pipe_surface *color, - struct pipe_surface *zstencil, - const float *clear_color, - double clear_depth, - unsigned clear_stencil); - -/* Begining of each tile: - */ -void lp_rast_start_tile( struct lp_rasterizer *, - unsigned x, - unsigned y ); - -void lp_rast_clear_color( struct lp_rasterizer * ); - -void lp_rast_clear_zstencil( struct lp_rasterizer * ); - -void lp_rast_load_color( struct lp_rasterizer * ); - -void lp_rast_load_zstencil( struct lp_rasterizer * ); - - -/* Within a tile: - */ -void lp_rast_set_state( struct lp_rasterizer *, - const struct lp_rast_state * ); - -void lp_rast_triangle( struct lp_rasterizer *, - const struct lp_rast_triangle * ); - -void lp_rast_shade_tile( struct lp_rasterizer *, - const struct lp_rast_shader_inputs * ); - -/* End of tile: - */ -void lp_rast_store_color( struct lp_rasterizer * ); - -void lp_rast_store_zstencil( struct lp_rasterizer * ); - - -/* Shutdown: - */ -void lp_rast_destroy( struct lp_rasterizer * ); - diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 8c67524506..d6e51888b9 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -48,6 +48,23 @@ #define DEBUG_VERTS 0 + +void +llvmpipe_setup_flush() +{ +} + +void +llvmpipe_setup_bind_framebuffer() +{ +} + +void +llvmpipe_setup_clear() +{ +} + + /* Stubs for lines & points for now: */ void diff --git a/src/gallium/drivers/llvmpipe/lp_setup_rasterize.c b/src/gallium/drivers/llvmpipe/lp_setup_rasterize.c index 5b4faf489b..bb7a4feb39 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_rasterize.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_rasterize.c @@ -1,7 +1,20 @@ void -rasterize( struct llvmpipe_context *llvmpipe, - struct binned_scene *scene ) +lp_setup_rasterize( struct llvmpipe_context *llvmpipe, + struct binned_scene *scene ) { - + lp_rast_bind_surfaces( rast, scene->framebuffer ); + + for (i = 0; i < scene->tiles_x; i++) { + for (j = 0; j < scene->tiles_y; j++) { + + lp_rast_start_tile( rast, i * TILESIZE, j * TILESIZE ); + + for (block = scene->tile[i][j].first; block; block = block->next) { + for (k = 0; k < block->nr_cmds; k++) { + block->cmd[k].func( rast, block->cmd[k].arg ); + } + } + } + } } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index a09e0fa643..d43db7b123 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -728,9 +728,9 @@ static void triangle_nop( struct llvmpipe_context *llvmpipe, { } -/** - * Do setup for triangle rasterization, then render the triangle. - */ + + + void setup_prepare_tri( struct llvmpipe_context *llvmpipe ) { llvmpipe->ccw_is_frontface = (llvmpipe->rasterizer->front_winding == diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 31eaadda21..fcd31136b7 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -232,6 +232,22 @@ update_tgsi_samplers( struct llvmpipe_context *llvmpipe ) llvmpipe->jit_context.samplers = (struct tgsi_sampler **)llvmpipe->tgsi.frag_samplers_list; } +static void +update_culling() +{ + if (lp->reduced_api_prim == PIPE_PRIM_TRIANGLES && + lp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL && + lp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) { + /* we'll do culling */ + setup->winding = lp->rasterizer->cull_mode; + } + else { + /* 'draw' will do culling */ + setup->winding = PIPE_WINDING_NONE; + } +} + + /* Hopefully this will remain quite simple, otherwise need to pull in * something like the state tracker mechanism. */ @@ -270,3 +286,14 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) llvmpipe->dirty = 0; } + + +void llvmpipe_prepare( ) +{ + struct llvmpipe_context *lp = setup->llvmpipe; + + if (lp->dirty) { + llvmpipe_update_derived(lp); + } + +} -- cgit v1.2.3 From 5e13dfe6181952f0f538a77b8a9f91c1d7601ceb Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 8 Oct 2009 12:15:12 +0100 Subject: llvmpipe: whip out the intra-tile code from lp_setup_tri.c The "setup" module handles building per-tile display lists. Intra-tile rendering is handled by lp_rast*.c --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 326 +++------------------------- 1 file changed, 33 insertions(+), 293 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index d43db7b123..98c87d551f 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -26,12 +26,10 @@ **************************************************************************/ /* - * Recursive rasterization for triangles + * Binning code for triangles */ #include "lp_context.h" -#include "lp_quad.h" -#include "lp_quad_pipe.h" #include "lp_setup.h" #include "lp_state.h" #include "draw/draw_context.h" @@ -42,43 +40,6 @@ #include "util/u_math.h" #include "util/u_memory.h" -#define BLOCKSIZE 4 - -struct triangle { - /* one-pixel sized trivial accept offsets for each plane */ - float ei1; - float ei2; - float ei3; - - /* one-pixel sized trivial reject offsets for each plane */ - float eo1; - float eo2; - float eo3; - - /* y deltas for vertex pairs */ - float dy12; - float dy23; - float dy31; - - /* x deltas for vertex pairs */ - float dx12; - float dx23; - float dx31; - - /* Attribute interpolation: - */ - float oneoverarea; - float x1; - float y1; - struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS]; - struct tgsi_interp_coef position_coef; - - /* A run of pre-initialized quads: - */ - struct llvmpipe_context *llvmpipe; - struct quad_header quad[4]; -}; - /** * Compute a0 for a constant-valued coefficient (GL_FLAT shading). @@ -267,163 +228,6 @@ static inline float subpixel_snap( float a ) } -/* Convert 8x8 block into four runs of quads and render each in turn. - */ -#if (BLOCKSIZE == 8) -static void block_full( struct triangle *tri, int x, int y ) -{ - struct quad_header *ptrs[4]; - int i; - - tri->quad[0].input.x0 = x + 0; - tri->quad[1].input.x0 = x + 2; - tri->quad[2].input.x0 = x + 4; - tri->quad[3].input.x0 = x + 6; - - for (i = 0; i < 4; i++, y += 2) { - tri->quad[0].inout.mask = 0xf; - tri->quad[1].inout.mask = 0xf; - tri->quad[2].inout.mask = 0xf; - tri->quad[3].inout.mask = 0xf; - - tri->quad[0].input.y0 = y; - tri->quad[1].input.y0 = y; - tri->quad[2].input.y0 = y; - tri->quad[3].input.y0 = y; - - /* XXX: don't bother with this ptrs business */ - ptrs[0] = &tri->quad[0]; - ptrs[1] = &tri->quad[1]; - ptrs[2] = &tri->quad[2]; - ptrs[3] = &tri->quad[3]; - - tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 4 ); - } -} -#elif (BLOCKSIZE == 4) -static void block_full( struct triangle *tri, int x, int y ) -{ - struct quad_header *ptrs[4]; - int iy; - - tri->quad[0].input.x0 = x + 0; - tri->quad[1].input.x0 = x + 2; - - for (iy = 0; iy < 4; iy += 2) { - tri->quad[0].inout.mask = 0xf; - tri->quad[1].inout.mask = 0xf; - - tri->quad[0].input.y0 = y + iy; - tri->quad[1].input.y0 = y + iy; - - /* XXX: don't bother with this ptrs business */ - ptrs[0] = &tri->quad[0]; - ptrs[1] = &tri->quad[1]; - - tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 2 ); - } -} -#else -static void block_full( struct triangle *tri, int x, int y ) -{ - struct quad_header *ptrs[4]; - int iy; - - tri->quad[0].input.x0 = x; - tri->quad[0].input.y0 = y; - tri->quad[0].inout.mask = 0xf; - - ptrs[0] = &tri->quad[0]; - tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 1 ); -} -#endif - - -static void -do_quad( struct triangle *tri, - int x, int y, - float c1, float c2, float c3 ) -{ - struct quad_header *quad = &tri->quad[0]; - - float xstep1 = -tri->dy12; - float xstep2 = -tri->dy23; - float xstep3 = -tri->dy31; - - float ystep1 = tri->dx12; - float ystep2 = tri->dx23; - float ystep3 = tri->dx31; - - quad->input.x0 = x; - quad->input.y0 = y; - quad->inout.mask = 0; - - if (c1 > 0 && - c2 > 0 && - c3 > 0) - quad->inout.mask |= 1; - - if (c1 + xstep1 > 0 && - c2 + xstep2 > 0 && - c3 + xstep3 > 0) - quad->inout.mask |= 2; - - if (c1 + ystep1 > 0 && - c2 + ystep2 > 0 && - c3 + ystep3 > 0) - quad->inout.mask |= 4; - - if (c1 + ystep1 + xstep1 > 0 && - c2 + ystep2 + xstep2 > 0 && - c3 + ystep3 + xstep3 > 0) - quad->inout.mask |= 8; - - if (quad->inout.mask) - tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, &quad, 1 ); -} - -/* Evaluate each pixel in a block, generate a mask and possibly render - * the quad: - */ -static void -do_block( struct triangle *tri, - int x, int y, - float c1, - float c2, - float c3 ) -{ - const int step = 2; - - float xstep1 = -step * tri->dy12; - float xstep2 = -step * tri->dy23; - float xstep3 = -step * tri->dy31; - - float ystep1 = step * tri->dx12; - float ystep2 = step * tri->dx23; - float ystep3 = step * tri->dx31; - - int ix, iy; - - for (iy = 0; iy < BLOCKSIZE; iy += 2) { - float cx1 = c1; - float cx2 = c2; - float cx3 = c3; - - for (ix = 0; ix < BLOCKSIZE; ix += 2) { - - do_quad(tri, x+ix, y+iy, cx1, cx2, cx3); - - cx1 += xstep1; - cx2 += xstep2; - cx3 += xstep3; - } - - c1 += ystep1; - c2 += ystep2; - c3 += ystep3; - } -} - @@ -441,14 +245,14 @@ do_block( struct triangle *tri, #define MAX3(a,b,c) MAX2(MAX2(a,b),c) static void -do_triangle_ccw(struct llvmpipe_context *llvmpipe, +do_triangle_ccw(struct lp_setup *setup, const float (*v1)[4], const float (*v2)[4], const float (*v3)[4], boolean frontfacing ) { - const int rt_width = llvmpipe->framebuffer.cbufs[0]->width; - const int rt_height = llvmpipe->framebuffer.cbufs[0]->height; + const int rt_width = setup->framebuffer.cbufs[0]->width; + const int rt_height = setup->framebuffer.cbufs[0]->height; const float y1 = subpixel_snap(v1[0][1]); const float y2 = subpixel_snap(v2[0][1]); @@ -458,15 +262,12 @@ do_triangle_ccw(struct llvmpipe_context *llvmpipe, const float x2 = subpixel_snap(v2[0][0]); const float x3 = subpixel_snap(v3[0][0]); - struct triangle tri; + struct triangle *tri = allocate_triangle; float area; float c1, c2, c3; int i; int minx, maxx, miny, maxy; - tri.llvmpipe = llvmpipe; - - tri.dx12 = x1 - x2; tri.dx23 = x2 - x3; tri.dx31 = x3 - x1; @@ -505,12 +306,7 @@ do_triangle_ccw(struct llvmpipe_context *llvmpipe, /* Setup parameter interpolants: */ - setup_tri_coefficients( llvmpipe, &tri, v1, v2, v3, frontfacing ); - - for (i = 0; i < Elements(tri.quad); i++) { - tri.quad[i].coef = tri.coef; - tri.quad[i].posCoef = &tri.position_coef; - } + setup_tri_coefficients( setup, &tri, v1, v2, v3, frontfacing ); /* half-edge constants, will be interated over the whole * rendertarget. @@ -548,73 +344,22 @@ do_triangle_ccw(struct llvmpipe_context *llvmpipe, tri.ei2 = tri.dx23 - tri.dy23 - tri.eo2; tri.ei3 = tri.dx31 - tri.dy31 - tri.eo3; - minx &= ~(BLOCKSIZE-1); /* aligned blocks */ - miny &= ~(BLOCKSIZE-1); /* aligned blocks */ + minx &= ~(TILESIZE-1); /* aligned blocks */ + miny &= ~(TILESIZE-1); /* aligned blocks */ c1 += tri.dx12 * miny - tri.dy12 * minx; c2 += tri.dx23 * miny - tri.dy23 * minx; c3 += tri.dx31 * miny - tri.dy31 * minx; - if ((miny & ~15) == (maxy & ~15) && - (minx & ~15) == (maxx & ~15)) + if (miny + TILESIZE > maxy && + minx + TILESIZE > maxx) { - const int step = 2; - - float xstep1 = -step * tri.dy12; - float xstep2 = -step * tri.dy23; - float xstep3 = -step * tri.dy31; - - float ystep1 = step * tri.dx12; - float ystep2 = step * tri.dx23; - float ystep3 = step * tri.dx31; - - float eo1 = tri.eo1 * step; - float eo2 = tri.eo2 * step; - float eo3 = tri.eo3 * step; - - int x, y; - - /* Subdivide space into NxM blocks, where each block is square and - * power-of-four in dimension. - * - * Trivially accept or reject blocks, else jump to per-pixel - * examination above. + /* Triangle is contained in a single tile: */ - for (y = miny; y < maxy; y += step) - { - float cx1 = c1; - float cx2 = c2; - float cx3 = c3; - - for (x = minx; x < maxx; x += step) - { - if (cx1 + eo1 < 0 || - cx2 + eo2 < 0 || - cx3 + eo3 < 0) - { - } - else - { - do_quad(&tri, x, y, cx1, cx2, cx3); - } - - /* Iterate cx values across the region: - */ - cx1 += xstep1; - cx2 += xstep2; - cx3 += xstep3; - } - - /* Iterate c values down the region: - */ - c1 += ystep1; - c2 += ystep2; - c3 += ystep3; - } } else { - const int step = BLOCKSIZE; + const int step = TILESIZE; float ei1 = tri.ei1 * step; float ei2 = tri.ei2 * step; @@ -645,7 +390,6 @@ do_triangle_ccw(struct llvmpipe_context *llvmpipe, float cx1 = c1; float cx2 = c2; float cx3 = c3; - boolean in = false; for (x = minx; x < maxx; x += step) { @@ -654,21 +398,18 @@ do_triangle_ccw(struct llvmpipe_context *llvmpipe, cx3 + eo3 < 0) { /* do nothing */ - if (in) - break; } else if (cx1 + ei1 > 0 && cx2 + ei2 > 0 && cx3 + ei3 > 0) { - in = TRUE; - block_full(&tri, x, y); /* trivial accept */ + /* shade whole tile */ + bin_command(tile[x][y], lp_rast_shade_tile, &tri->inputs ); } else { - in = TRUE; - // block_full(&tri, x, y); /* trivial accept */ - do_block(&tri, x, y, cx1, cx2, cx3); + /* shade partial tile */ + bin_command(tile[x][y], lp_rast_triangle, &tri ); } /* Iterate cx values across the region: @@ -687,23 +428,23 @@ do_triangle_ccw(struct llvmpipe_context *llvmpipe, } } -static void triangle_cw( struct llvmpipe_context *llvmpipe, +static void triangle_cw( struct setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ) { - do_triangle_ccw( llvmpipe, v1, v0, v2, !llvmpipe->ccw_is_frontface ); + do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface ); } -static void triangle_ccw( struct llvmpipe_context *llvmpipe, +static void triangle_ccw( struct setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ) { - do_triangle_ccw( llvmpipe, v0, v1, v2, llvmpipe->ccw_is_frontface ); + do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface ); } -static void triangle_both( struct llvmpipe_context *llvmpipe, +static void triangle_both( struct setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ) @@ -716,38 +457,37 @@ static void triangle_both( struct llvmpipe_context *llvmpipe, /* det = cross(e,f).z */ if (ex * fy - ey * fx < 0) - triangle_ccw( llvmpipe, v0, v1, v2 ); + triangle_ccw( setup, v0, v1, v2 ); else - triangle_cw( llvmpipe, v0, v1, v2 ); + triangle_cw( setup, v0, v1, v2 ); } -static void triangle_nop( struct llvmpipe_context *llvmpipe, +static void triangle_nop( struct setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ) { } - - - -void setup_prepare_tri( struct llvmpipe_context *llvmpipe ) +void setup_prepare_tri( struct setup_context *setup ) { - llvmpipe->ccw_is_frontface = (llvmpipe->rasterizer->front_winding == - PIPE_WINDING_CW); + struct llvmpipe_context *llvmpipe = setup->llvmpipe; + + setup->ccw_is_frontface = (llvmpipe->rasterizer->front_winding == + PIPE_WINDING_CW); switch (llvmpipe->rasterizer->cull_mode) { case PIPE_WINDING_NONE: - llvmpipe->triangle = triangle_both; + setup->triangle = triangle_both; break; case PIPE_WINDING_CCW: - llvmpipe->triangle = triangle_cw; + setup->triangle = triangle_cw; break; case PIPE_WINDING_CW: - llvmpipe->triangle = triangle_ccw; + setup->triangle = triangle_ccw; break; default: - llvmpipe->triangle = triangle_nop; + setup->triangle = triangle_nop; break; } } -- cgit v1.2.3 From a6676d896ed18426ed3d7e6340347974c1694ca2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Thu, 8 Oct 2009 12:44:30 +0100 Subject: llvmpipe: Add the rast -> jit shader glue. Ugly code. Will eventually be reduced to a very thin inlined function. --- src/gallium/drivers/llvmpipe/lp_rast.c | 58 +++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_rast.h | 5 +-- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 18 ++++++--- 3 files changed, 73 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 4771f821b3..58ef108123 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -98,6 +98,64 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, } } + +void lp_rast_shade_quads( const struct lp_rast_state *state, + struct lp_rast_tile *tile, + struct quad_header **quads, + unsigned nr ) +{ + struct lp_fragment_shader *fs = llvmpipe->fs; + struct quad_header *quad = quads[0]; + const unsigned x = quad->input.x0; + const unsigned y = quad->input.y0; + uint8_t *color; + uint8_t *depth; + uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS]; + unsigned chan_index; + unsigned q; + + /* Sanity checks */ + assert(nr * QUAD_SIZE == TILE_VECTOR_HEIGHT * TILE_VECTOR_WIDTH); + assert(x % TILE_VECTOR_WIDTH == 0); + assert(y % TILE_VECTOR_HEIGHT == 0); + for (q = 0; q < nr; ++q) { + assert(quads[q]->input.x0 == x + q*2); + assert(quads[q]->input.y0 == y); + } + + /* mask */ + for (q = 0; q < 4; ++q) + for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index) + mask[q][chan_index] = quads[q]->inout.mask & (1 << chan_index) ? ~0 : 0; + + /* color buffer */ + color = &TILE_PIXEL(tile->color, x, y, 0); + + /* depth buffer */ + assert((x % 2) == 0); + assert((y % 2) == 0); + depth = (uint8_t)*tile->depth + y*TILE_SIZE*4 + 2*x*4; + + /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ + assert(lp_check_alignment(mask, 16)); + + assert(lp_check_alignment(depth, 16)); + assert(lp_check_alignment(color, 16)); + assert(lp_check_alignment(state->jc.blend_color, 16)); + + /* run shader */ + state->jit_function( &state->jc, + x, y, + quad->coef->a0, + quad->coef->dadx, + quad->coef->dady, + &mask[0][0], + color, + depth); + +} + + /* End of tile: */ void lp_rast_store_color( struct lp_rasterizer *rast ) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 8f4bd52c9e..e417be935b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -17,9 +17,8 @@ struct lp_rast_state { /* The shader itself. Probably we also need to pass a pointer to * the tile color/z/stencil data somehow: */ - void (*run)( struct lp_jit_context *jc, - struct quad_header **quads, - unsigned nr ); + lp_jit_frag_func shader; + }; /* Coefficients necessary to run the shader at a given location: diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 538ec22551..7eced38d67 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -3,16 +3,24 @@ #include "lp_rast.h" + +/* We can choose whatever layout for the internal tile storage we + * prefer: + */ +struct lp_rast_tile +{ + uint8_t *color; + + uint8_t *depth; +}; + + struct lp_rasterizer { /* We can choose whatever layout for the internal tile storage we * prefer: */ - struct { - unsigned color[TILESIZE][TILESIZE]; - unsigned depth[TILESIZE][TILESIZE]; - char stencil[TILESIZE][TILESIZE]; - } tile; + struct lp_rast_tile tile; unsigned x; -- cgit v1.2.3 From 46df37ebfa83d7d06f4adebfbe201fed5bf2ecab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Thu, 8 Oct 2009 12:44:52 +0100 Subject: llvmpipe: Update SConscript. --- src/gallium/drivers/llvmpipe/SConscript | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 344b246337..5e0fadc247 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -46,8 +46,10 @@ llvmpipe = env.ConvenienceLibrary( 'lp_flush.c', 'lp_jit.c', 'lp_prim_vbuf.c', + 'lp_query.c', + 'lp_rast.c', + 'lp_rast_tri.c', 'lp_setup.c', - 'lp_query.c', 'lp_screen.c', 'lp_state_blend.c', 'lp_state_clip.c', -- cgit v1.2.3 From d614ced756f2cca64ec83b122da4cd028c08c0eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Thu, 8 Oct 2009 12:51:46 +0100 Subject: llvmpipe: Update includes and copyright headers. --- src/gallium/drivers/llvmpipe/lp_rast.c | 33 ++++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_rast.h | 26 +++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_rast_tri.c | 4 ++-- src/gallium/drivers/llvmpipe/lp_setup.c | 1 - 4 files changed, 61 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 58ef108123..df48ccce81 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -1,3 +1,36 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_memory.h" + +#include "lp_state.h" +#include "lp_quad.h" +#include "lp_rast.h" + struct lp_rasterizer *lp_rast_create( void ) { diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index e417be935b..dadde2e863 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -1,3 +1,29 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ #ifndef LP_RAST_H #define LP_RAST_H diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 4b7b3719de..40965d5f65 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2007-2009 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -18,7 +18,7 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index d6e51888b9..ac9bfad3f2 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -34,7 +34,6 @@ #include "lp_context.h" #include "lp_quad.h" -#include "lp_quad_pipe.h" #include "lp_setup.h" #include "lp_state.h" #include "draw/draw_context.h" -- cgit v1.2.3 From 931210424bc46b2c13919f0ac3e0ef781eff207e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 8 Oct 2009 15:44:29 +0100 Subject: llvmpipe: wip me harder --- src/gallium/drivers/llvmpipe/lp_prim_vbuf.c | 84 +++--- src/gallium/drivers/llvmpipe/lp_rast.h | 11 + src/gallium/drivers/llvmpipe/lp_setup.c | 343 ++++++++++++++++++---- src/gallium/drivers/llvmpipe/lp_setup.h | 44 ++- src/gallium/drivers/llvmpipe/lp_setup_context.h | 187 ++++++------ src/gallium/drivers/llvmpipe/lp_setup_rasterize.c | 20 -- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 151 +++++----- 7 files changed, 536 insertions(+), 304 deletions(-) delete mode 100644 src/gallium/drivers/llvmpipe/lp_setup_rasterize.c diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c index 8cccb2905b..6c51d40a8f 100644 --- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c @@ -171,14 +171,14 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { - llvmpipe_setup_point( setup_ctx, + lp_setup_point( setup_ctx, get_vert(vertex_buffer, indices[i-0], stride) ); } break; case PIPE_PRIM_LINES: for (i = 1; i < nr; i += 2) { - llvmpipe_setup_line( setup_ctx, + lp_setup_line( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); } @@ -186,7 +186,7 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_LINE_STRIP: for (i = 1; i < nr; i ++) { - llvmpipe_setup_line( setup_ctx, + lp_setup_line( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); } @@ -194,12 +194,12 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_LINE_LOOP: for (i = 1; i < nr; i ++) { - llvmpipe_setup_line( setup_ctx, + lp_setup_line( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); } if (nr) { - llvmpipe_setup_line( setup_ctx, + lp_setup_line( setup_ctx, get_vert(vertex_buffer, indices[nr-1], stride), get_vert(vertex_buffer, indices[0], stride) ); } @@ -208,7 +208,7 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_TRIANGLES: if (llvmpipe->rasterizer->flatshade_first) { for (i = 2; i < nr; i += 3) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-2], stride) ); @@ -216,7 +216,7 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) } else { for (i = 2; i < nr; i += 3) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); @@ -227,7 +227,7 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_TRIANGLE_STRIP: if (llvmpipe->rasterizer->flatshade_first) { for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i+(i&1)-1], stride), get_vert(vertex_buffer, indices[i-(i&1)], stride), get_vert(vertex_buffer, indices[i-2], stride) ); @@ -235,7 +235,7 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) } else { for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i+(i&1)-2], stride), get_vert(vertex_buffer, indices[i-(i&1)-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); @@ -246,7 +246,7 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_TRIANGLE_FAN: if (llvmpipe->rasterizer->flatshade_first) { for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[0], stride), get_vert(vertex_buffer, indices[i-1], stride) ); @@ -254,7 +254,7 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) } else { for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[0], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); @@ -265,11 +265,11 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_QUADS: if (llvmpipe->rasterizer->flatshade_first) { for (i = 3; i < nr; i += 4) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-3], stride) ); - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-3], stride) ); @@ -277,12 +277,12 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) } else { for (i = 3; i < nr; i += 4) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-3], stride), get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-0], stride) ); - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); @@ -293,11 +293,11 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_QUAD_STRIP: if (llvmpipe->rasterizer->flatshade_first) { for (i = 3; i < nr; i += 2) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-3], stride)); - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-3], stride) ); @@ -305,11 +305,11 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) } else { for (i = 3; i < nr; i += 2) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-3], stride), get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-0], stride) ); - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-3], stride), get_vert(vertex_buffer, indices[i-0], stride) ); @@ -324,7 +324,7 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) * flatshade_first state makes no difference. */ for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[0], stride) ); @@ -355,14 +355,14 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { - llvmpipe_setup_point( setup_ctx, + lp_setup_point( setup_ctx, get_vert(vertex_buffer, i-0, stride) ); } break; case PIPE_PRIM_LINES: for (i = 1; i < nr; i += 2) { - llvmpipe_setup_line( setup_ctx, + lp_setup_line( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); } @@ -370,7 +370,7 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_LINE_STRIP: for (i = 1; i < nr; i ++) { - llvmpipe_setup_line( setup_ctx, + lp_setup_line( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); } @@ -378,12 +378,12 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_LINE_LOOP: for (i = 1; i < nr; i ++) { - llvmpipe_setup_line( setup_ctx, + lp_setup_line( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); } if (nr) { - llvmpipe_setup_line( setup_ctx, + lp_setup_line( setup_ctx, get_vert(vertex_buffer, nr-1, stride), get_vert(vertex_buffer, 0, stride) ); } @@ -392,7 +392,7 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_TRIANGLES: if (llvmpipe->rasterizer->flatshade_first) { for (i = 2; i < nr; i += 3) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, i-2, stride) ); @@ -400,7 +400,7 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) } else { for (i = 2; i < nr; i += 3) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); @@ -411,7 +411,7 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_TRIANGLE_STRIP: if (llvmpipe->rasterizer->flatshade_first) { for (i = 2; i < nr; i++) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i+(i&1)-1, stride), get_vert(vertex_buffer, i-(i&1), stride), get_vert(vertex_buffer, i-2, stride) ); @@ -419,7 +419,7 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) } else { for (i = 2; i < nr; i++) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i+(i&1)-2, stride), get_vert(vertex_buffer, i-(i&1)-1, stride), get_vert(vertex_buffer, i-0, stride) ); @@ -430,7 +430,7 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_TRIANGLE_FAN: if (llvmpipe->rasterizer->flatshade_first) { for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, 0, stride), get_vert(vertex_buffer, i-1, stride) ); @@ -438,7 +438,7 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) } else { for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, 0, stride), get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); @@ -449,11 +449,11 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_QUADS: if (llvmpipe->rasterizer->flatshade_first) { for (i = 3; i < nr; i += 4) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-3, stride) ); - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, i-3, stride) ); @@ -461,11 +461,11 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) } else { for (i = 3; i < nr; i += 4) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-3, stride), get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-0, stride) ); - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); @@ -476,11 +476,11 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_QUAD_STRIP: if (llvmpipe->rasterizer->flatshade_first) { for (i = 3; i < nr; i += 2) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-3, stride) ); - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, i-3, stride) ); @@ -488,11 +488,11 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) } else { for (i = 3; i < nr; i += 2) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-3, stride), get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-0, stride) ); - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-3, stride), get_vert(vertex_buffer, i-0, stride) ); @@ -507,7 +507,7 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) * flatshade_first state makes no difference. */ for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, 0, stride) ); @@ -525,7 +525,7 @@ static void lp_vbuf_destroy(struct vbuf_render *vbr) { struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - llvmpipe_setup_destroy_context(cvbr->setup); + lp_setup_destroy_context(cvbr->setup); FREE(cvbr); } @@ -556,7 +556,7 @@ lp_create_vbuf_backend(struct llvmpipe_context *lp) cvbr->llvmpipe = lp; - cvbr->setup = llvmpipe_setup_create_context(cvbr->llvmpipe); + cvbr->setup = lp_setup_create_context(cvbr->llvmpipe); return &cvbr->base; } diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index dadde2e863..33a6065b89 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -90,6 +90,17 @@ struct lp_rast_triangle { struct lp_rast_shader_inputs inputs; }; +struct clear_tile { + boolean do_color; + boolean do_depth_stencil; + unsigned rgba; + unsigned depth_stencil; +}; + +struct load_tile { + boolean do_color; + boolean do_depth_stencil; +}; struct lp_rasterizer *lp_rast_create( void ); diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index ac9bfad3f2..514366b71f 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -26,124 +26,337 @@ **************************************************************************/ /** - * \brief Primitive rasterization/rendering (points, lines) + * Tiling engine. * - * \author Keith Whitwell - * \author Brian Paul + * Builds per-tile display lists and executes them on calls to + * lp_setup_flush(). */ -#include "lp_context.h" -#include "lp_quad.h" #include "lp_setup.h" -#include "lp_state.h" -#include "draw/draw_context.h" -#include "draw/draw_private.h" -#include "draw/draw_vertex.h" -#include "pipe/p_shader_tokens.h" -#include "pipe/p_thread.h" #include "util/u_math.h" #include "util/u_memory.h" +void lp_setup_new_cmd_block( struct cmd_block_list *list ) +{ + struct cmd_block *block = MALLOC_STRUCT(cmd_block); + list->tail->next = block; + list->tail = block; + block->next = NULL; + block->count = 0; +} -#define DEBUG_VERTS 0 +void lp_setup_new_data_block( struct data_block_list *list ) +{ + struct data_block *block = MALLOC_STRUCT(data_block); + list->tail->next = block; + list->tail = block; + block->next = NULL; + block->used = 0; +} +static void reset_context( struct setup_context *setup ) +{ + for (i = 0; i < setup->tiles_x; i++) { + for (j = 0; j < setup->tiles_y; j++) { + struct cmd_block_list *list = scene->tile[i][j]; + struct cmd_block *block; + struct cmd_block *tmp; + + for (block = list->first; block != list->tail; block = tmp) { + tmp = block->next; + FREE(block); + } + + list->first = list->tail; + } + } -void -llvmpipe_setup_flush() + { + struct data_block_list *list = &scene->data; + struct data_block *block, *tmp; + + for (block = list->first; block != list->tail; block = tmp) { + tmp = block->next; + FREE(block); + } + + list->first = list->tail; + } +} + + + + +/* Add a command to all active bins. + */ +static void bin_everywhere( struct setup_context *setup, + bin_cmd cmd, + const union lp_rast_cmd_arg *arg ) { + unsigned i, j; + for (i = 0; i < setup->tiles_x; i++) + for (j = 0; j < setup->tiles_y; j++) + bin_cmd( setup, &setup->tile[i][j], cmd, arg ); } -void -llvmpipe_setup_bind_framebuffer() + +static void +rasterize_bins( struct setup_context *setup, + struct lp_rast *rast, + boolean write_depth ) { + lp_rast_bind_color( rast, + scene->fb.color, + TRUE ); /* WRITE */ + + lp_rast_bind_depth( rast, + scene->fb.depth, + write_depth ); /* WRITE */ + + for (i = 0; i < scene->tiles_x; i++) { + for (j = 0; j < scene->tiles_y; j++) { + + lp_rast_start_tile( rast, + i * TILESIZE, + j * TILESIZE ); + + for (block = scene->tile[i][j].first; block; block = block->next) { + for (k = 0; k < block->nr_cmds; k++) { + block->cmd[k].func( rast, block->cmd[k].arg ); + } + } + + lp_rast_finish_tile( rast ); + } + } + + lp_setup_free_data( setup ); } -void -llvmpipe_setup_clear() + + +static void +begin_binning( struct setup_context *setup ) { + if (setup->fb.color) { + if (setup->fb.clear_color) + bin_everywhere( setup, + lp_rast_clear_color, + &setup->clear_data ); + else + bin_everywhere( setup, + lp_rast_load_color, + NULL ); + } + + if (setup->fb.zstencil) { + if (setup->fb.clear_zstencil) + bin_everywhere( setup, + lp_rast_clear_zstencil, + &setup->clear_data ); + else + bin_everywhere( setup, + lp_rast_load_zstencil, + NULL ); + } } -/* Stubs for lines & points for now: +/* This basically bins and then flushes any outstanding full-screen + * clears. + * + * TODO: fast path for fullscreen clears and no triangles. */ -void -llvmpipe_setup_point(struct setup_context *setup, - const float (*v0)[4]) +static void +execute_clears( struct setup_context *setup ) { + begin_binning( setup ); + rasterize_bins( setup ); } + +static void +set_state( struct setup_context *setup, + unsigned new_state ) +{ + unsigned old_state = setup->state; + + if (old_state == new_state) + return; + + switch (new_state) { + case SETUP_ACTIVE: + if (old_state == SETUP_FLUSHED) + setup_begin_binning( setup ); + break; + + case SETUP_CLEARED: + if (old_state == SETUP_ACTIVE) { + assert(0); + return; + } + break; + + case SETUP_FLUSHED: + if (old_state == SETUP_CLEAR) + execute_clears( setup ); + else + rasterize_bins( setup ); + break; + } + + setup->state = new_state; +} + + void -llvmpipe_setup_line(struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4]) +lp_setup_flush( struct setup_context *setup, + unsigned flags ) { + set_state( setup, SETUP_FLUSHED ); } -/* Called after statechange, before emitting primitives. If binning - * is active, this function should store relevant state in the binning - * context. - * - * That includes: - * - current fragment shader function - * - bound constant buffer contents - * - bound textures - * - blend color - * - etc. - * - * Basically everything needed at some point in the future to - * rasterize triangles for the current state. - * - * Additionally this will set up the state needed for the rasterizer - * to process and bin incoming triangles. That would include such - * things as: - * - cull mode - * - ??? - * - etc. - * - */ -void setup_prepare( struct setup_context *setup ) +void +lp_setup_bind_framebuffer( struct setup_context *setup, + struct pipe_surface *color, + struct pipe_surface *zstencil ) { - struct llvmpipe_context *lp = setup->llvmpipe; + unsigned width, height; - if (lp->dirty) { - llvmpipe_update_derived(lp); - } + set_state( setup, SETUP_FLUSHED ); + + pipe_surface_reference( &setup->fb.color, color ); + pipe_surface_reference( &setup->fb.zstencil, zstencil ); + + width = MAX2( color->width, zstencil->width ); + height = MAX2( color->height, zstencil->height ); + + setup->tiles_x = align( width, TILESIZE ) / TILESIZE; + setup->tiles_y = align( height, TILESIZE ) / TILESIZE; +} + +void +lp_setup_clear( struct setup_context *setup, + const float *clear_color, + double clear_depth, + unsigned clear_stencil, + unsigned flags ) +{ + if (setup->state == SETUP_ACTIVE) { + struct lp_rast_clear_info *clear_info; + unsigned i, j; + + clear_info = alloc_clear_info( setup ); - lp->quad.first->begin( lp->quad.first ); + if (flags & PIPE_CLEAR_COLOR) { + pack_color( setup, + clear_info->color, + clear_color ); + bin_everywhere(setup, lp_rast_clear_color, clear_info ); + } - if (lp->reduced_api_prim == PIPE_PRIM_TRIANGLES && - lp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL && - lp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) { - /* we'll do culling */ - setup->winding = lp->rasterizer->cull_mode; + if (flags & PIPE_CLEAR_DEPTH_STENCIL) { + pack_depth_stencil( setup, + clear_info->depth, + clear_depth, + clear_stencil ); + + bin_everywhere(setup, lp_rast_clear_zstencil, clear_info ); + } } else { - /* 'draw' will do culling */ - setup->winding = PIPE_WINDING_NONE; + set_state( setup, SETUP_CLEARED ); + setup->clear.flags |= flags; + + if (flags & PIPE_CLEAR_COLOR) { + memcpy(setup->clear.color, color, sizeof setup->clear.color); + } + + if (flags & PIPE_CLEAR_DEPTH_STENCIL) { + setup->clear.depth = clear_depth; + setup->clear.stencil = clear_stencil; + } } +} + + +void +lp_setup_set_fs_inputs( struct setup_context *setup, + const enum lp_interp *interp, + unsigned nr ) +{ + memcpy( setup->interp, interp, nr * sizeof interp[0] ); +} - setup_prepare_tri( setup->llvmpipe ); + +static void +first_triangle( struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) +{ + set_state( setup, STATE_ACTIVE ); + setup_choose_triangle( setup, v0, v1, v2 ); +} + + + +/* Stubs for lines & points for now: + */ +void +lp_setup_point(struct setup_context *setup, + const float (*v0)[4]) +{ + setup->point( setup, v0 ); } +void +lp_setup_line(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4]) +{ + setup->line( setup, v0, v1 ); +} + +void +lp_setup_triangle(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) +{ + setup->triangle( setup, v0, v1, v2 ); +} void setup_destroy_context( struct setup_context *setup ) { + lp_rast_destroy( setup->rast ); FREE( setup ); } /** - * Create a new primitive setup/render stage. + * Create a new primitive tiling engine. Currently also creates a + * rasterizer to use with it. */ -struct setup_context *setup_create_context( struct llvmpipe_context *llvmpipe ) +struct setup_context *setup_create_context( void ) { struct setup_context *setup = CALLOC_STRUCT(setup_context); - unsigned i; - setup->llvmpipe = llvmpipe; + setup->rast = lp_rast_create( void ); + if (!setup->rast) + goto fail; + + for (i = 0; i < TILES_X; i++) + for (j = 0; j < TILES_Y; j++) + setup->tile[i][j].first = + setup->tile[i][j].next = CALLOC_STRUCT(cmd_block); return setup; + +fail: + FREE(setup); + return NULL; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 05aaaf83b8..2542faad36 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -27,28 +27,46 @@ #ifndef LP_SETUP_H #define LP_SETUP_H + +enum lp_interp { + LP_INTERP_CONSTANT, + LP_INTERP_LINEAR, + LP_INTERP_PERSPECTIVE, + LP_INTERP_POSITION, + LP_INTERP_FACING +}; + struct setup_context; -struct llvmpipe_context; -/* Note, not using setup_context currently - */ +struct setup_context * +lp_setup_create( void ); void -llvmpipe_setup_line(struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4]); +lp_setup_triangle(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v1)[4]); void -llvmpipe_setup_point( struct setup_context *setup, - const float (*v0)[4] ); - +lp_setup_line(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4]); -struct setup_context *setup_create_context( struct llvmpipe_context *llvmpipe ); +void +lp_setup_point( struct setup_context *setup, + const float (*v0)[4] ); -void setup_prepare( struct setup_context *setup ); +void +lp_setup_set_triangle_state( struct setup_context *setup, + unsigned cullmode, + boolean front_is_ccw ); -void setup_destroy_context( struct setup_context *setup ); +void +lp_setup_set_fs_inputs( struct setup_context *setup, + const enum lp_interp *interp, + unsigned nr ); -void setup_prepare_tri( struct llvmpipe_context *llvmpipe ); +void +lp_setup_destroy( struct setup_context *setup ); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 848705e099..91540d6751 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -27,114 +27,125 @@ #ifndef LP_SETUP_CONTEXT_H #define LP_SETUP_CONTEXT_H -struct clear_tile { - boolean do_color; - boolean do_depth_stencil; - unsigned rgba; - unsigned depth_stencil; -}; - -struct load_tile { - boolean do_color; - boolean do_depth_stencil; -}; -/* Shade tile points directly at this: - */ -struct shader_inputs { - /* Some way of updating rasterizer state: - */ - /* ??? */ - - /* Attribute interpolation: - */ - float oneoverarea; - float x1; - float y1; - - struct tgsi_interp_coef position_coef; - struct tgsi_interp_coef *coef; -}; +#define CMD_BLOCK_MAX 128 +#define DATA_BLOCK_SIZE (16 * 1024 - sizeof(unsigned) - sizeof(void *)) -/* Shade triangle points at this: +/* switch to a non-pointer value for this: */ -struct shade_triangle { - /* one-pixel sized trivial accept offsets for each plane */ - float ei1; - float ei2; - float ei3; - - /* one-pixel sized trivial reject offsets for each plane */ - float eo1; - float eo2; - float eo3; - - /* y deltas for vertex pairs */ - float dy12; - float dy23; - float dy31; - - /* x deltas for vertex pairs */ - float dx12; - float dx23; - float dx31; - - struct shader_inputs inputs; -}; - -struct bin_cmd { - enum { - CMD_END = 0, - CMD_CLEAR, - CMD_LOAD_TILE, - CMD_SHADE_TILE, - CMD_SHADE_TRIANGLE, - } cmd; - - union { - struct triangle *tri; - struct clear *clear; - } ptr; -}; +typedef void (*lp_rast_cmd)( struct lp_rast *, const union lp_rast_cmd_arg * ); struct cmd_block { - struct bin_cmd cmds[128]; + union lp_rast_arg *arg[CMD_BLOCK_MAX]; + lp_rast_cmd cmd[CMD_BLOCK_MAX]; unsigned count; struct cmd_block *next; }; -/* Triangles - */ struct data_block { - ubyte data[4096 - sizeof(unsigned) - sizeof(struct cmd_block *)]; - unsigned count; + ubyte data[DATA_BLOCK_SZ]; + unsigned used; struct data_block *next; }; -/* Need to store the state at the time the triangle was drawn, at - * least as it is needed during rasterization. That would include at - * minimum the constant values referred to by the fragment shader, - * blend state, etc. Much of this is code-generated into the shader - * in llvmpipe -- may be easier to do this work there. - */ -struct state_block { +struct cmd_block_list { + struct cmd_block *head; + struct cmd_block *tail; }; +struct data_block_list { + struct data_block *head; + struct data_block *tail; +}; + -/** - * Basically all the data from a binner scene: +/* We're limited to 2K by 2K for 32bit fixed point rasterization. + * Will need a 64-bit version for larger framebuffers. */ -struct binned_scene { - struct llvmpipe_context *llvmpipe; +#define MAXHEIGHT 2048 +#define MAXWIDTH 2048 + +struct setup_context { + + /* When there are multiple threads, will want to double-buffer the + * bin arrays: + */ + struct cmd_block_list bin[MAXHEIGHT / TILESIZE][MAXWIDTH / TILESIZE]; + struct data_block_list data; + + unsigned tiles_x; + unsigned tiles_y; - struct cmd_block *bin[MAX_HEIGHT / BIN_SIZE][MAX_WIDTH / BIN_SIZE]; - struct data_block *data; + struct { + struct pipe_surface *color; + struct pipe_surface *zstencil; + } fb; + + struct { + unsigned flags; + float clear_color[4]; + double clear_depth; + unsigned clear_stencil; + } clear; + + enum { + SETUP_FLUSHED, + SETUP_CLEARED, + SETUP_ACTIVE + } state; + + struct { + enum lp_interp inputs[PIPE_MAX_ATTRIBS]; + unsigned nr_inputs; + } fs; + + void (*point)( struct setup_context *, + const float (*v0)[4]); + + void (*line)( struct setup_context *, + const float (*v0)[4], + const float (*v1)[4]); + + void (*triangle)( struct setup_context *, + const float (*v0)[4], + const float (*v1)[4], + const float (*v1)[4]); }; -static INLINE struct triangle *get_triangle( struct setup_context *setup ) +static INLINE void *get_data( struct data_block_list *list, + unsigned size) { - if (setup->triangles->count == TRIANGLE_BLOCK_COUNT) - return setup_triangle_from_new_block( setup ); - return &setup->triangles[setup->triangles->count++]; + if (list->tail->used + size > DATA_BLOCK_SIZE) { + lp_setup_new_data_block( list ); + } + + { + struct data_block *tail = list->tail; + char *data = tail->data + tail->used; + tail->used += size; + return data; + } +} + +/* Add a command to a given bin. + */ +static INLINE void bin_cmd( struct cmd_block_list *list, + bin_cmd cmd, + const union lp_rast_cmd_arg *arg ) +{ + if (list->tail.count == CMD_BLOCK_MAX) { + lp_setup_new_cmd_block( list ) + } + + { + struct cmd_block *tail = list->tail; + unsigned i = tail->count; + tail->cmd[i] = cmd; + tail->arg[i] = arg; + tail->count++; + } } + + + diff --git a/src/gallium/drivers/llvmpipe/lp_setup_rasterize.c b/src/gallium/drivers/llvmpipe/lp_setup_rasterize.c deleted file mode 100644 index bb7a4feb39..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_setup_rasterize.c +++ /dev/null @@ -1,20 +0,0 @@ - -void -lp_setup_rasterize( struct llvmpipe_context *llvmpipe, - struct binned_scene *scene ) -{ - lp_rast_bind_surfaces( rast, scene->framebuffer ); - - for (i = 0; i < scene->tiles_x; i++) { - for (j = 0; j < scene->tiles_y; j++) { - - lp_rast_start_tile( rast, i * TILESIZE, j * TILESIZE ); - - for (block = scene->tile[i][j].first; block; block = block->next) { - for (k = 0; k < block->nr_cmds; k++) { - block->cmd[k].func( rast, block->cmd[k].arg ); - } - } - } - } -} diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 98c87d551f..75a0ea8888 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -29,14 +29,8 @@ * Binning code for triangles */ -#include "lp_context.h" #include "lp_setup.h" #include "lp_state.h" -#include "draw/draw_context.h" -#include "draw/draw_private.h" -#include "draw/draw_vertex.h" -#include "pipe/p_shader_tokens.h" -#include "pipe/p_thread.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -163,56 +157,55 @@ setup_fragcoord_coef(struct triangle *tri, unsigned slot) /** * Compute the tri->coef[] array dadx, dady, a0 values. */ -static void setup_tri_coefficients( struct llvmpipe_context *llvmpipe, +static void setup_tri_coefficients( struct setup_context *setup, struct triangle *tri, const float (*v1)[4], const float (*v2)[4], const float (*v3)[4], boolean frontface ) { - const struct lp_fragment_shader *fs = llvmpipe->fs; - const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe); + const struct vertex_info *vinfo = setup->vinfo; unsigned input; /* z and w are done by linear interpolation: */ - linear_coef(tri, &tri->position_coef, v1, v2, v3, 0, 2); - linear_coef(tri, &tri->position_coef, v1, v2, v3, 0, 3); + linear_coef(tri, tri->position_coef, v1, v2, v3, 0, 2); + linear_coef(tri, tri->position_coef, v1, v2, v3, 0, 3); /* setup interpolation for all the remaining attributes: */ - for (input = 0; input < fs->info.num_inputs; input++) { + for (input = 0; input < vinfo->num_fs_inputs; input++) { unsigned vert_attr = vinfo->attrib[input].src_index; unsigned i; switch (vinfo->attrib[input].interp_mode) { case INTERP_CONSTANT: for (i = 0; i < NUM_CHANNELS; i++) - constant_coef(&tri->coef[input], v3, vert_attr, i); + constant_coef(tri->coef[input], v3, vert_attr, i); break; case INTERP_LINEAR: for (i = 0; i < NUM_CHANNELS; i++) - linear_coef(tri, &tri->coef[input], v1, v2, v3, vert_attr, i); + linear_coef(tri, tri->coef[input], v1, v2, v3, vert_attr, i); break; case INTERP_PERSPECTIVE: for (i = 0; i < NUM_CHANNELS; i++) - perspective_coef(tri, &tri->coef[input], v1, v2, v3, vert_attr, i); + perspective_coef(tri, tri->coef[input], v1, v2, v3, vert_attr, i); break; case INTERP_POS: setup_fragcoord_coef(tri, input); break; - default: - assert(0); - } - - if (fs->info.input_semantic_name[input] == TGSI_SEMANTIC_FACE) { + case INTERP_FACING: tri->coef[input].a0[0] = 1.0f - frontface; tri->coef[input].dadx[0] = 0.0; tri->coef[input].dady[0] = 0.0; + break; + + default: + assert(0); } } } @@ -262,22 +255,22 @@ do_triangle_ccw(struct lp_setup *setup, const float x2 = subpixel_snap(v2[0][0]); const float x3 = subpixel_snap(v3[0][0]); - struct triangle *tri = allocate_triangle; + struct triangle *tri = allocate_triangle( setup ); float area; float c1, c2, c3; int i; int minx, maxx, miny, maxy; - tri.dx12 = x1 - x2; - tri.dx23 = x2 - x3; - tri.dx31 = x3 - x1; + tri->dx12 = x1 - x2; + tri->dx23 = x2 - x3; + tri->dx31 = x3 - x1; - tri.dy12 = y1 - y2; - tri.dy23 = y2 - y3; - tri.dy31 = y3 - y1; + tri->dy12 = y1 - y2; + tri->dy23 = y2 - y3; + tri->dy31 = y3 - y1; - area = (tri.dx12 * tri.dy31 - - tri.dx31 * tri.dy12); + area = (tri->dx12 * tri->dy31 - + tri->dx31 * tri->dy12); /* Cull non-ccw and zero-sized triangles. */ @@ -302,80 +295,87 @@ do_triangle_ccw(struct lp_setup *setup, /* The only divide in this code. Is it really needed? */ - tri.oneoverarea = 1.0f / area; + tri->oneoverarea = 1.0f / area; /* Setup parameter interpolants: */ - setup_tri_coefficients( setup, &tri, v1, v2, v3, frontfacing ); + setup_tri_coefficients( setup, tri, v1, v2, v3, frontfacing ); /* half-edge constants, will be interated over the whole * rendertarget. */ - c1 = tri.dy12 * x1 - tri.dx12 * y1; - c2 = tri.dy23 * x2 - tri.dx23 * y2; - c3 = tri.dy31 * x3 - tri.dx31 * y3; + c1 = tri->dy12 * x1 - tri->dx12 * y1; + c2 = tri->dy23 * x2 - tri->dx23 * y2; + c3 = tri->dy31 * x3 - tri->dx31 * y3; /* correct for top-left fill convention: */ - if (tri.dy12 < 0 || (tri.dy12 == 0 && tri.dx12 > 0)) c1++; - if (tri.dy23 < 0 || (tri.dy23 == 0 && tri.dx23 > 0)) c2++; - if (tri.dy31 < 0 || (tri.dy31 == 0 && tri.dx31 > 0)) c3++; + if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) c1++; + if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) c2++; + if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) c3++; /* find trivial reject offsets for each edge for a single-pixel * sized block. These will be scaled up at each recursive level to * match the active blocksize. Scaling in this way works best if * the blocks are square. */ - tri.eo1 = 0; - if (tri.dy12 < 0) tri.eo1 -= tri.dy12; - if (tri.dx12 > 0) tri.eo1 += tri.dx12; + tri->eo1 = 0; + if (tri->dy12 < 0) tri->eo1 -= tri->dy12; + if (tri->dx12 > 0) tri->eo1 += tri->dx12; - tri.eo2 = 0; - if (tri.dy23 < 0) tri.eo2 -= tri.dy23; - if (tri.dx23 > 0) tri.eo2 += tri.dx23; + tri->eo2 = 0; + if (tri->dy23 < 0) tri->eo2 -= tri->dy23; + if (tri->dx23 > 0) tri->eo2 += tri->dx23; - tri.eo3 = 0; - if (tri.dy31 < 0) tri.eo3 -= tri.dy31; - if (tri.dx31 > 0) tri.eo3 += tri.dx31; + tri->eo3 = 0; + if (tri->dy31 < 0) tri->eo3 -= tri->dy31; + if (tri->dx31 > 0) tri->eo3 += tri->dx31; /* Calculate trivial accept offsets from the above. */ - tri.ei1 = tri.dx12 - tri.dy12 - tri.eo1; - tri.ei2 = tri.dx23 - tri.dy23 - tri.eo2; - tri.ei3 = tri.dx31 - tri.dy31 - tri.eo3; + tri->ei1 = tri->dx12 - tri->dy12 - tri->eo1; + tri->ei2 = tri->dx23 - tri->dy23 - tri->eo2; + tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3; minx &= ~(TILESIZE-1); /* aligned blocks */ miny &= ~(TILESIZE-1); /* aligned blocks */ - c1 += tri.dx12 * miny - tri.dy12 * minx; - c2 += tri.dx23 * miny - tri.dy23 * minx; - c3 += tri.dx31 * miny - tri.dy31 * minx; + c1 += tri->dx12 * miny - tri->dy12 * minx; + c2 += tri->dx23 * miny - tri->dy23 * minx; + c3 += tri->dx31 * miny - tri->dy31 * minx; - if (miny + TILESIZE > maxy && - minx + TILESIZE > maxx) + /* Convert to tile coordinates: + */ + minx /= TILESIZE; + maxx /= TILESIZE; + miny /= TILESIZE; + maxy /= TILESIZE; + + if (miny == maxy && minx == maxx) { /* Triangle is contained in a single tile: */ + bin_command(setup->tile[minx][miny], lp_rast_triangle, tri ); } else { const int step = TILESIZE; - float ei1 = tri.ei1 * step; - float ei2 = tri.ei2 * step; - float ei3 = tri.ei3 * step; + float ei1 = tri->ei1 * step; + float ei2 = tri->ei2 * step; + float ei3 = tri->ei3 * step; - float eo1 = tri.eo1 * step; - float eo2 = tri.eo2 * step; - float eo3 = tri.eo3 * step; + float eo1 = tri->eo1 * step; + float eo2 = tri->eo2 * step; + float eo3 = tri->eo3 * step; - float xstep1 = -step * tri.dy12; - float xstep2 = -step * tri.dy23; - float xstep3 = -step * tri.dy31; + float xstep1 = -step * tri->dy12; + float xstep2 = -step * tri->dy23; + float xstep3 = -step * tri->dy31; - float ystep1 = step * tri.dx12; - float ystep2 = step * tri.dx23; - float ystep3 = step * tri.dx31; + float ystep1 = step * tri->dx12; + float ystep2 = step * tri->dx23; + float ystep3 = step * tri->dx31; int x, y; @@ -385,13 +385,13 @@ do_triangle_ccw(struct lp_setup *setup, * Trivially accept or reject blocks, else jump to per-pixel * examination above. */ - for (y = miny; y < maxy; y += step) + for (y = miny; y < maxy; y++) { float cx1 = c1; float cx2 = c2; float cx3 = c3; - for (x = minx; x < maxx; x += step) + for (x = minx; x < maxx; x++) { if (cx1 + eo1 < 0 || cx2 + eo2 < 0 || @@ -404,12 +404,12 @@ do_triangle_ccw(struct lp_setup *setup, cx3 + ei3 > 0) { /* shade whole tile */ - bin_command(tile[x][y], lp_rast_shade_tile, &tri->inputs ); + bin_command(setup->tile[x][y], lp_rast_shade_tile, &tri->inputs ); } else { /* shade partial tile */ - bin_command(tile[x][y], lp_rast_triangle, &tri ); + bin_command(setup->tile[x][y], lp_rast_triangle, tri ); } /* Iterate cx values across the region: @@ -469,14 +469,13 @@ static void triangle_nop( struct setup_context *setup, { } -void setup_prepare_tri( struct setup_context *setup ) +void setup_set_tri_state( struct setup_context *setup, + unsigned cull_mode, + boolean ccw_is_frontface) { - struct llvmpipe_context *llvmpipe = setup->llvmpipe; - - setup->ccw_is_frontface = (llvmpipe->rasterizer->front_winding == - PIPE_WINDING_CW); + setup->ccw_is_frontface = ccw_is_frontface; - switch (llvmpipe->rasterizer->cull_mode) { + switch (cull_mode) { case PIPE_WINDING_NONE: setup->triangle = triangle_both; break; -- cgit v1.2.3 From d2e2b75633b5ac8eef20fd3c6846d871a6d7eb1a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 8 Oct 2009 16:32:03 +0100 Subject: llvmpipe: rewrite llvmpipe_clear() --- src/gallium/drivers/llvmpipe/lp_clear.c | 36 +++++++-------------------------- 1 file changed, 7 insertions(+), 29 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_clear.c b/src/gallium/drivers/llvmpipe/lp_clear.c index bdcff94b9b..9efb3d4083 100644 --- a/src/gallium/drivers/llvmpipe/lp_clear.c +++ b/src/gallium/drivers/llvmpipe/lp_clear.c @@ -33,12 +33,10 @@ #include "pipe/p_defines.h" -#include "util/u_pack_color.h" #include "lp_clear.h" #include "lp_context.h" -#include "lp_surface.h" +#include "lp_setup.h" #include "lp_state.h" -#include "lp_tile_cache.h" /** @@ -46,36 +44,16 @@ * No masking, no scissor (clear entire buffer). */ void -llvmpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, - double depth, unsigned stencil) +llvmpipe_clear(struct pipe_context *pipe, + unsigned buffers, + const float *rgba, + double depth, + unsigned stencil) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - unsigned cv; - uint i; if (llvmpipe->no_rast) return; -#if 0 - llvmpipe_update_derived(llvmpipe); /* not needed?? */ -#endif - - if (buffers & PIPE_CLEAR_COLOR) { - for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) { - struct pipe_surface *ps = llvmpipe->framebuffer.cbufs[i]; - - util_pack_color(rgba, ps->format, &cv); - lp_tile_cache_clear(llvmpipe->cbuf_cache[i], rgba, cv); - } - llvmpipe->dirty_render_cache = TRUE; - } - - if (buffers & PIPE_CLEAR_DEPTHSTENCIL) { - struct pipe_surface *ps = llvmpipe->framebuffer.zsbuf; - - cv = util_pack_z_stencil(ps->format, depth, stencil); - - /* non-cached surface */ - pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, cv); - } + lp_setup_clear( llvmpipe->setup, buffers, rgba, depth, stencil ); } -- cgit v1.2.3 From 6b65685def525a8023ee936e82e53af2bc4e38b2 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 8 Oct 2009 16:33:12 +0100 Subject: llvmpipe: remove tex tile cache and related code --- src/gallium/drivers/llvmpipe/lp_context.c | 70 +- src/gallium/drivers/llvmpipe/lp_context.h | 21 +- src/gallium/drivers/llvmpipe/lp_draw_arrays.c | 3 - src/gallium/drivers/llvmpipe/lp_flush.c | 32 +- src/gallium/drivers/llvmpipe/lp_setup.h | 9 +- src/gallium/drivers/llvmpipe/lp_state.h | 6 - src/gallium/drivers/llvmpipe/lp_state_derived.c | 7 +- src/gallium/drivers/llvmpipe/lp_state_sampler.c | 2 - src/gallium/drivers/llvmpipe/lp_state_surface.c | 41 +- src/gallium/drivers/llvmpipe/lp_tex_cache.c | 304 ---- src/gallium/drivers/llvmpipe/lp_tex_cache.h | 151 -- src/gallium/drivers/llvmpipe/lp_tex_sample.h | 3 - src/gallium/drivers/llvmpipe/lp_tex_sample_c.c | 1713 ----------------------- src/gallium/drivers/llvmpipe/lp_texture.c | 4 +- 14 files changed, 33 insertions(+), 2333 deletions(-) delete mode 100644 src/gallium/drivers/llvmpipe/lp_tex_cache.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_tex_cache.h delete mode 100644 src/gallium/drivers/llvmpipe/lp_tex_sample_c.c diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 57e71f3e98..f087b65321 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -41,62 +41,12 @@ #include "lp_prim_vbuf.h" #include "lp_state.h" #include "lp_surface.h" -#include "lp_tile_cache.h" -#include "lp_tex_cache.h" #include "lp_texture.h" #include "lp_winsys.h" #include "lp_query.h" -/** - * Map any drawing surfaces which aren't already mapped - */ -void -llvmpipe_map_transfers(struct llvmpipe_context *lp) -{ - struct pipe_screen *screen = lp->pipe.screen; - struct pipe_surface *zsbuf = lp->framebuffer.zsbuf; - unsigned i; - - for (i = 0; i < lp->framebuffer.nr_cbufs; i++) { - lp_tile_cache_map_transfers(lp->cbuf_cache[i]); - } - - if(zsbuf) { - if(!lp->zsbuf_transfer) - lp->zsbuf_transfer = screen->get_tex_transfer(screen, zsbuf->texture, - zsbuf->face, zsbuf->level, zsbuf->zslice, - PIPE_TRANSFER_READ_WRITE, - 0, 0, zsbuf->width, zsbuf->height); - if(lp->zsbuf_transfer && !lp->zsbuf_map) - lp->zsbuf_map = screen->transfer_map(screen, lp->zsbuf_transfer); - - } -} - - -/** - * Unmap any mapped drawing surfaces - */ -void -llvmpipe_unmap_transfers(struct llvmpipe_context *lp) -{ - uint i; - - for (i = 0; i < lp->framebuffer.nr_cbufs; i++) { - lp_tile_cache_unmap_transfers(lp->cbuf_cache[i]); - } - - if(lp->zsbuf_transfer) { - struct pipe_screen *screen = lp->pipe.screen; - - if(lp->zsbuf_map) { - screen->transfer_unmap(screen, lp->zsbuf_transfer); - lp->zsbuf_map = NULL; - } - } -} static void llvmpipe_destroy( struct pipe_context *pipe ) @@ -107,14 +57,16 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) if (llvmpipe->draw) draw_destroy( llvmpipe->draw ); + if (llvmpipe->setup) + lp_setup_destroy( llvmpipe->setup ); + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { - lp_destroy_tile_cache(llvmpipe->cbuf_cache[i]); pipe_surface_reference(&llvmpipe->framebuffer.cbufs[i], NULL); } + pipe_surface_reference(&llvmpipe->framebuffer.zsbuf, NULL); for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - lp_destroy_tex_tile_cache(llvmpipe->tex_cache[i]); pipe_texture_reference(&llvmpipe->texture[i], NULL); } @@ -135,7 +87,7 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe, struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); unsigned i; - if(llvmpipe->dirty_render_cache) { + if (lp_setup_is_active(llvmpipe->setup)) { for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) { if(llvmpipe->framebuffer.cbufs[i] && llvmpipe->framebuffer.cbufs[i]->texture == texture) @@ -226,21 +178,10 @@ llvmpipe_create( struct pipe_screen *screen ) llvmpipe_init_query_funcs( llvmpipe ); llvmpipe_init_texture_funcs( llvmpipe ); - /* - * Alloc caches for accessing drawing surfaces and textures. - */ - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) - llvmpipe->cbuf_cache[i] = lp_create_tile_cache( screen ); - - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) - llvmpipe->tex_cache[i] = lp_create_tex_tile_cache( screen ); - - /* vertex shader samplers */ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { llvmpipe->tgsi.vert_samplers[i].base.get_samples = lp_get_samples; llvmpipe->tgsi.vert_samplers[i].processor = TGSI_PROCESSOR_VERTEX; - llvmpipe->tgsi.vert_samplers[i].cache = llvmpipe->tex_cache[i]; llvmpipe->tgsi.vert_samplers_list[i] = &llvmpipe->tgsi.vert_samplers[i]; } @@ -248,7 +189,6 @@ llvmpipe_create( struct pipe_screen *screen ) for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { llvmpipe->tgsi.frag_samplers[i].base.get_samples = lp_get_samples; llvmpipe->tgsi.frag_samplers[i].processor = TGSI_PROCESSOR_FRAGMENT; - llvmpipe->tgsi.frag_samplers[i].cache = llvmpipe->tex_cache[i]; llvmpipe->tgsi.frag_samplers_list[i] = &llvmpipe->tgsi.frag_samplers[i]; } diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 0b77ae58d5..17e8897546 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -42,12 +42,10 @@ struct llvmpipe_vbuf_render; struct draw_context; struct draw_stage; -struct llvmpipe_tile_cache; -struct llvmpipe_tex_tile_cache; struct lp_fragment_shader; struct lp_vertex_shader; struct lp_blend_state; - +struct lp_setup_context; struct llvmpipe_context { struct pipe_context pipe; /**< base class */ @@ -112,8 +110,6 @@ struct llvmpipe_context { /** Derived from scissor and surface bounds: */ struct pipe_scissor_state cliprect; - unsigned line_stipple_counter; - /** TGSI exec things */ struct { struct lp_shader_sampler vert_samplers[PIPE_MAX_SAMPLERS]; @@ -122,6 +118,9 @@ struct llvmpipe_context { struct lp_shader_sampler *frag_samplers_list[PIPE_MAX_SAMPLERS]; } tgsi; + /** The tiling engine */ + struct lp_setup_context *setup; + /** The primitive drawing context */ struct draw_context *draw; @@ -129,18 +128,8 @@ struct llvmpipe_context { struct vbuf_render *vbuf_backend; struct draw_stage *vbuf; - boolean dirty_render_cache; - - struct llvmpipe_tile_cache *cbuf_cache[PIPE_MAX_COLOR_BUFS]; - - /* TODO: we shouldn't be using external interfaces internally like this */ - struct pipe_transfer *zsbuf_transfer; - uint8_t *zsbuf_map; - unsigned tex_timestamp; - struct llvmpipe_tex_tile_cache *tex_cache[PIPE_MAX_SAMPLERS]; - - unsigned no_rast : 1; + boolean no_rast; struct lp_jit_context jit_context; }; diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index 89772e62d3..2bffcdb3ba 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -123,7 +123,6 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, if (lp->dirty) llvmpipe_update_derived( lp ); - llvmpipe_map_transfers(lp); llvmpipe_map_constant_buffers(lp); /* @@ -164,8 +163,6 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, /* Note: leave drawing surfaces mapped */ llvmpipe_unmap_constant_buffers(lp); - - lp->dirty_render_cache = TRUE; return TRUE; } diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index cd8381fe30..d0dd41f09c 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -36,8 +36,6 @@ #include "lp_context.h" #include "lp_surface.h" #include "lp_state.h" -#include "lp_tile_cache.h" -#include "lp_tex_cache.h" #include "lp_winsys.h" @@ -47,40 +45,14 @@ llvmpipe_flush( struct pipe_context *pipe, struct pipe_fence_handle **fence ) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - uint i; draw_flush(llvmpipe->draw); if (flags & PIPE_FLUSH_SWAPBUFFERS) { - /* If this is a swapbuffers, just flush color buffers. - * - * The zbuffer changes are not discarded, but held in the cache - * in the hope that a later clear will wipe them out. - */ - for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) - if (llvmpipe->cbuf_cache[i]) { - lp_tile_cache_map_transfers(llvmpipe->cbuf_cache[i]); - lp_flush_tile_cache(llvmpipe->cbuf_cache[i]); - } - - /* Need this call for hardware buffers before swapbuffers. - * - * there should probably be another/different flush-type function - * that's called before swapbuffers because we don't always want - * to unmap surfaces when flushing. - */ - llvmpipe_unmap_transfers(llvmpipe); + lp_setup_flush( llvmpipe->setup, FALSE ); } else if (flags & PIPE_FLUSH_RENDER_CACHE) { - for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) - if (llvmpipe->cbuf_cache[i]) { - lp_tile_cache_map_transfers(llvmpipe->cbuf_cache[i]); - lp_flush_tile_cache(llvmpipe->cbuf_cache[i]); - } - - /* FIXME: untile zsbuf! */ - - llvmpipe->dirty_render_cache = FALSE; + lp_setup_flush( llvmpipe->setup, TRUE ); } /* Enable to dump BMPs of the color/depth buffers each frame */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 2542faad36..39e7b558c8 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -41,11 +41,18 @@ struct setup_context; struct setup_context * lp_setup_create( void ); +void +lp_setup_clear(struct pipe_context *pipe, + unsigned buffers, + const float *rgba, + double depth, + unsigned stencil); + void lp_setup_triangle(struct setup_context *setup, const float (*v0)[4], const float (*v1)[4], - const float (*v1)[4]); + const float (*v2)[4]); void lp_setup_line(struct setup_context *setup, diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 7b26ce61a3..a9980d6f14 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -207,12 +207,6 @@ void llvmpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags); -void -llvmpipe_map_transfers(struct llvmpipe_context *lp); - -void -llvmpipe_unmap_transfers(struct llvmpipe_context *lp); - void llvmpipe_map_texture_surfaces(struct llvmpipe_context *lp); diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index fcd31136b7..c9439c7154 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -33,7 +33,6 @@ #include "draw/draw_private.h" #include "lp_context.h" #include "lp_screen.h" -#include "lp_tex_cache.h" #include "lp_state.h" @@ -215,7 +214,7 @@ update_tgsi_samplers( struct llvmpipe_context *llvmpipe ) for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { llvmpipe->tgsi.vert_samplers[i].sampler = llvmpipe->sampler[i]; llvmpipe->tgsi.vert_samplers[i].texture = llvmpipe->texture[i]; - llvmpipe->tgsi.frag_samplers[i].base.get_samples = lp_get_samples; + llvmpipe->tgsi.vert_samplers[i].base.get_samples = lp_get_samples; } /* fragment shader samplers */ @@ -225,10 +224,6 @@ update_tgsi_samplers( struct llvmpipe_context *llvmpipe ) llvmpipe->tgsi.frag_samplers[i].base.get_samples = lp_get_samples; } - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - lp_tex_tile_cache_validate_texture( llvmpipe->tex_cache[i] ); - } - llvmpipe->jit_context.samplers = (struct tgsi_sampler **)llvmpipe->tgsi.frag_samplers_list; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index c69d90c723..ae787801eb 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -37,7 +37,6 @@ #include "lp_context.h" #include "lp_state.h" #include "lp_texture.h" -#include "lp_tex_cache.h" #include "draw/draw_context.h" @@ -97,7 +96,6 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe, struct pipe_texture *tex = i < num ? texture[i] : NULL; pipe_texture_reference(&llvmpipe->texture[i], tex); - lp_tex_tile_cache_set_texture(llvmpipe->tex_cache[i], tex); if(tex) { struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex); diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c index c06ce8b75c..06560335d7 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -31,16 +31,12 @@ #include "lp_context.h" #include "lp_state.h" #include "lp_surface.h" -#include "lp_tile_cache.h" #include "draw/draw_context.h" /** - * XXX this might get moved someday * Set the framebuffer surface info: color buffers, zbuffer, stencil buffer. - * Here, we flush the old surfaces and update the tile cache to point to the new - * surfaces. */ void llvmpipe_set_framebuffer_state(struct pipe_context *pipe, @@ -48,38 +44,23 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, { struct llvmpipe_context *lp = llvmpipe_context(pipe); uint i; + boolean dirty = FALSE; for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { - /* check if changing cbuf */ if (lp->framebuffer.cbufs[i] != fb->cbufs[i]) { - /* flush old */ - lp_tile_cache_map_transfers(lp->cbuf_cache[i]); - lp_flush_tile_cache(lp->cbuf_cache[i]); - - /* assign new */ pipe_surface_reference(&lp->framebuffer.cbufs[i], fb->cbufs[i]); - - /* update cache */ - lp_tile_cache_set_surface(lp->cbuf_cache[i], fb->cbufs[i]); + dirty = TRUE; } } - lp->framebuffer.nr_cbufs = fb->nr_cbufs; + if (lp->framebuffer.nr_cbufs != fb->nr_cbufs) { + dirty = TRUE; + lp->framebuffer.nr_cbufs = fb->nr_cbufs; + } /* zbuf changing? */ if (lp->framebuffer.zsbuf != fb->zsbuf) { - - if(lp->zsbuf_transfer) { - struct pipe_screen *screen = pipe->screen; - - if(lp->zsbuf_map) { - screen->transfer_unmap(screen, lp->zsbuf_transfer); - lp->zsbuf_map = NULL; - } - - screen->tex_transfer_destroy(lp->zsbuf_transfer); - lp->zsbuf_transfer = NULL; - } + dirty = TRUE; /* assign new */ pipe_surface_reference(&lp->framebuffer.zsbuf, fb->zsbuf); @@ -100,8 +81,8 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, } } - lp->framebuffer.width = fb->width; - lp->framebuffer.height = fb->height; - - lp->dirty |= LP_NEW_FRAMEBUFFER; + if (dirty) { + lp_setup_set_framebuffer( llvmpipe->setup, fb ); + lp->dirty |= LP_NEW_FRAMEBUFFER; + } } diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.c b/src/gallium/drivers/llvmpipe/lp_tex_cache.c deleted file mode 100644 index 773e848242..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_tex_cache.c +++ /dev/null @@ -1,304 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Texture tile caching. - * - * Author: - * Brian Paul - */ - -#include "pipe/p_inlines.h" -#include "util/u_memory.h" -#include "util/u_tile.h" -#include "util/u_format.h" -#include "lp_context.h" -#include "lp_surface.h" -#include "lp_texture.h" -#include "lp_tex_cache.h" - - - -/** - * Return the position in the cache for the tile that contains win pos (x,y). - * We currently use a direct mapped cache so this is like a hack key. - * At some point we should investige something more sophisticated, like - * a LRU replacement policy. - */ -#define CACHE_POS(x, y) \ - (((x) + (y) * 5) % NUM_ENTRIES) - - - -/** - * Is the tile at (x,y) in cleared state? - */ -static INLINE uint -is_clear_flag_set(const uint *bitvec, union tex_tile_address addr) -{ - int pos, bit; - pos = addr.bits.y * (MAX_TEX_WIDTH / TEX_TILE_SIZE) + addr.bits.x; - assert(pos / 32 < (MAX_TEX_WIDTH / TEX_TILE_SIZE) * (MAX_TEX_HEIGHT / TEX_TILE_SIZE) / 32); - bit = bitvec[pos / 32] & (1 << (pos & 31)); - return bit; -} - - -/** - * Mark the tile at (x,y) as not cleared. - */ -static INLINE void -clear_clear_flag(uint *bitvec, union tex_tile_address addr) -{ - int pos; - pos = addr.bits.y * (MAX_TEX_WIDTH / TEX_TILE_SIZE) + addr.bits.x; - assert(pos / 32 < (MAX_TEX_WIDTH / TEX_TILE_SIZE) * (MAX_TEX_HEIGHT / TEX_TILE_SIZE) / 32); - bitvec[pos / 32] &= ~(1 << (pos & 31)); -} - - -struct llvmpipe_tex_tile_cache * -lp_create_tex_tile_cache( struct pipe_screen *screen ) -{ - struct llvmpipe_tex_tile_cache *tc; - uint pos; - - tc = CALLOC_STRUCT( llvmpipe_tex_tile_cache ); - if (tc) { - tc->screen = screen; - for (pos = 0; pos < NUM_ENTRIES; pos++) { - tc->entries[pos].addr.bits.invalid = 1; - } - tc->last_tile = &tc->entries[0]; /* any tile */ - } - return tc; -} - - -void -lp_destroy_tex_tile_cache(struct llvmpipe_tex_tile_cache *tc) -{ - struct pipe_screen *screen; - uint pos; - - for (pos = 0; pos < NUM_ENTRIES; pos++) { - /*assert(tc->entries[pos].x < 0);*/ - } - if (tc->transfer) { - screen = tc->transfer->texture->screen; - screen->tex_transfer_destroy(tc->transfer); - } - if (tc->tex_trans) { - screen = tc->tex_trans->texture->screen; - screen->tex_transfer_destroy(tc->tex_trans); - } - - FREE( tc ); -} - - -void -lp_tex_tile_cache_map_transfers(struct llvmpipe_tex_tile_cache *tc) -{ - if (tc->transfer && !tc->transfer_map) - tc->transfer_map = tc->screen->transfer_map(tc->screen, tc->transfer); - - if (tc->tex_trans && !tc->tex_trans_map) - tc->tex_trans_map = tc->screen->transfer_map(tc->screen, tc->tex_trans); -} - - -void -lp_tex_tile_cache_unmap_transfers(struct llvmpipe_tex_tile_cache *tc) -{ - if (tc->transfer_map) { - tc->screen->transfer_unmap(tc->screen, tc->transfer); - tc->transfer_map = NULL; - } - - if (tc->tex_trans_map) { - tc->screen->transfer_unmap(tc->screen, tc->tex_trans); - tc->tex_trans_map = NULL; - } -} - -void -lp_tex_tile_cache_validate_texture(struct llvmpipe_tex_tile_cache *tc) -{ - if (tc->texture) { - struct llvmpipe_texture *lpt = llvmpipe_texture(tc->texture); - if (lpt->timestamp != tc->timestamp) { - /* texture was modified, invalidate all cached tiles */ - uint i; - debug_printf("INV %d %d\n", tc->timestamp, lpt->timestamp); - for (i = 0; i < NUM_ENTRIES; i++) { - tc->entries[i].addr.bits.invalid = 1; - } - - tc->timestamp = lpt->timestamp; - } - } -} - -/** - * Specify the texture to cache. - */ -void -lp_tex_tile_cache_set_texture(struct llvmpipe_tex_tile_cache *tc, - struct pipe_texture *texture) -{ - uint i; - - assert(!tc->transfer); - - if (tc->texture != texture) { - pipe_texture_reference(&tc->texture, texture); - - if (tc->tex_trans) { - struct pipe_screen *screen = tc->tex_trans->texture->screen; - - if (tc->tex_trans_map) { - screen->transfer_unmap(screen, tc->tex_trans); - tc->tex_trans_map = NULL; - } - - screen->tex_transfer_destroy(tc->tex_trans); - tc->tex_trans = NULL; - } - - /* mark as entries as invalid/empty */ - /* XXX we should try to avoid this when the teximage hasn't changed */ - for (i = 0; i < NUM_ENTRIES; i++) { - tc->entries[i].addr.bits.invalid = 1; - } - - tc->tex_face = -1; /* any invalid value here */ - } -} - - -/** - * Given the texture face, level, zslice, x and y values, compute - * the cache entry position/index where we'd hope to find the - * cached texture tile. - * This is basically a direct-map cache. - * XXX There's probably lots of ways in which we can improve this. - */ -static INLINE uint -tex_cache_pos( union tex_tile_address addr ) -{ - uint entry = (addr.bits.x + - addr.bits.y * 9 + - addr.bits.z * 3 + - addr.bits.face + - addr.bits.level * 7); - - return entry % NUM_ENTRIES; -} - -/** - * Similar to lp_get_cached_tile() but for textures. - * Tiles are read-only and indexed with more params. - */ -const struct llvmpipe_cached_tex_tile * -lp_find_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc, - union tex_tile_address addr ) -{ - struct pipe_screen *screen = tc->screen; - struct llvmpipe_cached_tex_tile *tile; - - tile = tc->entries + tex_cache_pos( addr ); - - if (addr.value != tile->addr.value) { - - /* cache miss. Most misses are because we've invaldiated the - * texture cache previously -- most commonly on binding a new - * texture. Currently we effectively flush the cache on texture - * bind. - */ -#if 0 - _debug_printf("miss at %u: x=%d y=%d z=%d face=%d level=%d\n" - " tile %u: x=%d y=%d z=%d face=%d level=%d\n", - pos, x/TEX_TILE_SIZE, y/TEX_TILE_SIZE, z, face, level, - pos, tile->addr.bits.x, tile->addr.bits.y, tile->z, tile->face, tile->level); -#endif - - /* check if we need to get a new transfer */ - if (!tc->tex_trans || - tc->tex_face != addr.bits.face || - tc->tex_level != addr.bits.level || - tc->tex_z != addr.bits.z) { - /* get new transfer (view into texture) */ - - if (tc->tex_trans) { - if (tc->tex_trans_map) { - tc->screen->transfer_unmap(tc->screen, tc->tex_trans); - tc->tex_trans_map = NULL; - } - - screen->tex_transfer_destroy(tc->tex_trans); - tc->tex_trans = NULL; - } - - tc->tex_trans = - screen->get_tex_transfer(screen, tc->texture, - addr.bits.face, - addr.bits.level, - addr.bits.z, - PIPE_TRANSFER_READ, 0, 0, - tc->texture->width[addr.bits.level], - tc->texture->height[addr.bits.level]); - - tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans); - - tc->tex_face = addr.bits.face; - tc->tex_level = addr.bits.level; - tc->tex_z = addr.bits.z; - } - - { - unsigned x = addr.bits.x * TEX_TILE_SIZE; - unsigned y = addr.bits.y * TEX_TILE_SIZE; - unsigned w = TEX_TILE_SIZE; - unsigned h = TEX_TILE_SIZE; - - if (pipe_clip_tile(x, y, &w, &h, tc->tex_trans)) { - assert(0); - } - - util_format_read_4ub(tc->tex_trans->format, - (uint8_t *)tile->color, sizeof tile->color[0], - tc->tex_trans_map, tc->tex_trans->stride, - x, y, w, h); - } - - tile->addr = addr; - } - - tc->last_tile = tile; - return tile; -} diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.h b/src/gallium/drivers/llvmpipe/lp_tex_cache.h deleted file mode 100644 index 9fa6c36812..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_tex_cache.h +++ /dev/null @@ -1,151 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef LP_TEX_CACHE_H -#define LP_TEX_CACHE_H - - -#include "pipe/p_compiler.h" - - -struct llvmpipe_context; -struct llvmpipe_tex_tile_cache; - - -/** - * Cache tile size (width and height). This needs to be a power of two. - */ -#define TEX_TILE_SIZE 64 - - -/* If we need to support > 4096, just expand this to be a 64 bit - * union, or consider tiling in Z as well. - */ -union tex_tile_address { - struct { - unsigned x:6; /* 4096 / TEX_TILE_SIZE */ - unsigned y:6; /* 4096 / TEX_TILE_SIZE */ - unsigned z:12; /* 4096 -- z not tiled */ - unsigned face:3; - unsigned level:4; - unsigned invalid:1; - } bits; - unsigned value; -}; - - -struct llvmpipe_cached_tex_tile -{ - union tex_tile_address addr; - uint8_t color[TEX_TILE_SIZE][TEX_TILE_SIZE][4]; -}; - -#define NUM_ENTRIES 50 - - -/** XXX move these */ -#define MAX_TEX_WIDTH 2048 -#define MAX_TEX_HEIGHT 2048 - - -struct llvmpipe_tex_tile_cache -{ - struct pipe_screen *screen; - struct pipe_surface *surface; /**< the surface we're caching */ - struct pipe_transfer *transfer; - void *transfer_map; - - struct pipe_texture *texture; /**< if caching a texture */ - unsigned timestamp; - - struct llvmpipe_cached_tex_tile entries[NUM_ENTRIES]; - - struct pipe_transfer *tex_trans; - void *tex_trans_map; - int tex_face, tex_level, tex_z; - - struct llvmpipe_cached_tex_tile *last_tile; /**< most recently retrieved tile */ -}; - - -extern struct llvmpipe_tex_tile_cache * -lp_create_tex_tile_cache( struct pipe_screen *screen ); - -extern void -lp_destroy_tex_tile_cache(struct llvmpipe_tex_tile_cache *tc); - -extern void -lp_tex_tile_cache_map_transfers(struct llvmpipe_tex_tile_cache *tc); - -extern void -lp_tex_tile_cache_unmap_transfers(struct llvmpipe_tex_tile_cache *tc); - -extern void -lp_tex_tile_cache_set_texture(struct llvmpipe_tex_tile_cache *tc, - struct pipe_texture *texture); - -void -lp_tex_tile_cache_validate_texture(struct llvmpipe_tex_tile_cache *tc); - -extern const struct llvmpipe_cached_tex_tile * -lp_find_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc, - union tex_tile_address addr ); - -static INLINE const union tex_tile_address -tex_tile_address( unsigned x, - unsigned y, - unsigned z, - unsigned face, - unsigned level ) -{ - union tex_tile_address addr; - - addr.value = 0; - addr.bits.x = x / TEX_TILE_SIZE; - addr.bits.y = y / TEX_TILE_SIZE; - addr.bits.z = z; - addr.bits.face = face; - addr.bits.level = level; - - return addr; -} - -/* Quickly retrieve tile if it matches last lookup. - */ -static INLINE const struct llvmpipe_cached_tex_tile * -lp_get_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc, - union tex_tile_address addr ) -{ - if (tc->last_tile->addr.value == addr.value) - return tc->last_tile; - - return lp_find_cached_tex_tile( tc, addr ); -} - - -#endif /* LP_TEX_CACHE_H */ - diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h index 9ad1bde956..526ea100db 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h @@ -34,7 +34,6 @@ #include "tgsi/tgsi_exec.h" -struct llvmpipe_tex_tile_cache; struct lp_sampler_static_state; @@ -55,8 +54,6 @@ struct lp_shader_sampler const struct pipe_texture *texture; const struct pipe_sampler_state *sampler; - - struct llvmpipe_tex_tile_cache *cache; }; diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c deleted file mode 100644 index a1365a045f..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c +++ /dev/null @@ -1,1713 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * Copyright 2008 VMware, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Texture sampling - * - * Authors: - * Brian Paul - */ - -#include "lp_context.h" -#include "lp_quad.h" -#include "lp_surface.h" -#include "lp_texture.h" -#include "lp_tex_sample.h" -#include "lp_tex_cache.h" -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_shader_tokens.h" -#include "util/u_math.h" -#include "util/u_memory.h" - - - -/* - * Note, the FRAC macro has to work perfectly. Otherwise you'll sometimes - * see 1-pixel bands of improperly weighted linear-filtered textures. - * The tests/texwrap.c demo is a good test. - * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0. - * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x). - */ -#define FRAC(f) ((f) - util_ifloor(f)) - - -/** - * Linear interpolation macro - */ -static INLINE float -lerp(float a, float v0, float v1) -{ - return v0 + a * (v1 - v0); -} - - -/** - * Do 2D/biliner interpolation of float values. - * v00, v10, v01 and v11 are typically four texture samples in a square/box. - * a and b are the horizontal and vertical interpolants. - * It's important that this function is inlined when compiled with - * optimization! If we find that's not true on some systems, convert - * to a macro. - */ -static INLINE float -lerp_2d(float a, float b, - float v00, float v10, float v01, float v11) -{ - const float temp0 = lerp(a, v00, v10); - const float temp1 = lerp(a, v01, v11); - return lerp(b, temp0, temp1); -} - - -/** - * As above, but 3D interpolation of 8 values. - */ -static INLINE float -lerp_3d(float a, float b, float c, - float v000, float v100, float v010, float v110, - float v001, float v101, float v011, float v111) -{ - const float temp0 = lerp_2d(a, b, v000, v100, v010, v110); - const float temp1 = lerp_2d(a, b, v001, v101, v011, v111); - return lerp(c, temp0, temp1); -} - - - -/** - * If A is a signed integer, A % B doesn't give the right value for A < 0 - * (in terms of texture repeat). Just casting to unsigned fixes that. - */ -#define REMAINDER(A, B) ((unsigned) (A) % (unsigned) (B)) - - -/** - * Apply texture coord wrapping mode and return integer texture indexes - * for a vector of four texcoords (S or T or P). - * \param wrapMode PIPE_TEX_WRAP_x - * \param s the incoming texcoords - * \param size the texture image size - * \param icoord returns the integer texcoords - * \return integer texture index - */ -static INLINE void -nearest_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord[4]) -{ - uint ch; - switch (wrapMode) { - case PIPE_TEX_WRAP_REPEAT: - /* s limited to [0,1) */ - /* i limited to [0,size-1] */ - for (ch = 0; ch < 4; ch++) { - int i = util_ifloor(s[ch] * size); - icoord[ch] = REMAINDER(i, size); - } - return; - case PIPE_TEX_WRAP_CLAMP: - /* s limited to [0,1] */ - /* i limited to [0,size-1] */ - for (ch = 0; ch < 4; ch++) { - if (s[ch] <= 0.0F) - icoord[ch] = 0; - else if (s[ch] >= 1.0F) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(s[ch] * size); - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - { - /* s limited to [min,max] */ - /* i limited to [0, size-1] */ - const float min = 1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - if (s[ch] < min) - icoord[ch] = 0; - else if (s[ch] > max) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(s[ch] * size); - } - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - { - /* s limited to [min,max] */ - /* i limited to [-1, size] */ - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - if (s[ch] <= min) - icoord[ch] = -1; - else if (s[ch] >= max) - icoord[ch] = size; - else - icoord[ch] = util_ifloor(s[ch] * size); - } - } - return; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - { - const float min = 1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - const int flr = util_ifloor(s[ch]); - float u; - if (flr & 1) - u = 1.0F - (s[ch] - (float) flr); - else - u = s[ch] - (float) flr; - if (u < min) - icoord[ch] = 0; - else if (u > max) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(u * size); - } - } - return; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - for (ch = 0; ch < 4; ch++) { - /* s limited to [0,1] */ - /* i limited to [0,size-1] */ - const float u = fabsf(s[ch]); - if (u <= 0.0F) - icoord[ch] = 0; - else if (u >= 1.0F) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(u * size); - } - return; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - { - /* s limited to [min,max] */ - /* i limited to [0, size-1] */ - const float min = 1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - const float u = fabsf(s[ch]); - if (u < min) - icoord[ch] = 0; - else if (u > max) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(u * size); - } - } - return; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - { - /* s limited to [min,max] */ - /* i limited to [0, size-1] */ - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - const float u = fabsf(s[ch]); - if (u < min) - icoord[ch] = -1; - else if (u > max) - icoord[ch] = size; - else - icoord[ch] = util_ifloor(u * size); - } - } - return; - default: - assert(0); - } -} - - -/** - * Used to compute texel locations for linear sampling for four texcoords. - * \param wrapMode PIPE_TEX_WRAP_x - * \param s the texcoords - * \param size the texture image size - * \param icoord0 returns first texture indexes - * \param icoord1 returns second texture indexes (usually icoord0 + 1) - * \param w returns blend factor/weight between texture indexes - * \param icoord returns the computed integer texture coords - */ -static INLINE void -linear_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord0[4], int icoord1[4], float w[4]) -{ - uint ch; - - switch (wrapMode) { - case PIPE_TEX_WRAP_REPEAT: - for (ch = 0; ch < 4; ch++) { - float u = s[ch] * size - 0.5F; - icoord0[ch] = REMAINDER(util_ifloor(u), size); - icoord1[ch] = REMAINDER(icoord0[ch] + 1, size); - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_CLAMP: - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], 0.0F, 1.0F); - u = u * size - 0.5f; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], 0.0F, 1.0F); - u = u * size - 0.5f; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord0[ch] < 0) - icoord0[ch] = 0; - if (icoord1[ch] >= (int) size) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - { - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], min, max); - u = u * size - 0.5f; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - } - break;; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - for (ch = 0; ch < 4; ch++) { - const int flr = util_ifloor(s[ch]); - float u; - if (flr & 1) - u = 1.0F - (s[ch] - (float) flr); - else - u = s[ch] - (float) flr; - u = u * size - 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord0[ch] < 0) - icoord0[ch] = 0; - if (icoord1[ch] >= (int) size) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - for (ch = 0; ch < 4; ch++) { - float u = fabsf(s[ch]); - if (u >= 1.0F) - u = (float) size; - else - u *= size; - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - for (ch = 0; ch < 4; ch++) { - float u = fabsf(s[ch]); - if (u >= 1.0F) - u = (float) size; - else - u *= size; - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord0[ch] < 0) - icoord0[ch] = 0; - if (icoord1[ch] >= (int) size) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - { - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - float u = fabsf(s[ch]); - if (u <= min) - u = min * size; - else if (u >= max) - u = max * size; - else - u *= size; - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - } - break;; - default: - assert(0); - } -} - - -/** - * For RECT textures / unnormalized texcoords - * Only a subset of wrap modes supported. - */ -static INLINE void -nearest_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord[4]) -{ - uint ch; - switch (wrapMode) { - case PIPE_TEX_WRAP_CLAMP: - for (ch = 0; ch < 4; ch++) { - int i = util_ifloor(s[ch]); - icoord[ch]= CLAMP(i, 0, (int) size-1); - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - /* fall-through */ - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - for (ch = 0; ch < 4; ch++) { - icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) ); - } - return; - default: - assert(0); - } -} - - -/** - * For RECT textures / unnormalized texcoords. - * Only a subset of wrap modes supported. - */ -static INLINE void -linear_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord0[4], int icoord1[4], float w[4]) -{ - uint ch; - switch (wrapMode) { - case PIPE_TEX_WRAP_CLAMP: - for (ch = 0; ch < 4; ch++) { - /* Not exactly what the spec says, but it matches NVIDIA output */ - float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f); - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - /* fall-through */ - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F); - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord1[ch] > (int) size - 1) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break; - default: - assert(0); - } -} - - -static unsigned -choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) -{ - /* - major axis - direction target sc tc ma - ---------- ------------------------------- --- --- --- - +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx - -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx - +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry - -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry - +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz - -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz - */ - const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz); - unsigned face; - float sc, tc, ma; - - if (arx > ary && arx > arz) { - if (rx >= 0.0F) { - face = PIPE_TEX_FACE_POS_X; - sc = -rz; - tc = -ry; - ma = arx; - } - else { - face = PIPE_TEX_FACE_NEG_X; - sc = rz; - tc = -ry; - ma = arx; - } - } - else if (ary > arx && ary > arz) { - if (ry >= 0.0F) { - face = PIPE_TEX_FACE_POS_Y; - sc = rx; - tc = rz; - ma = ary; - } - else { - face = PIPE_TEX_FACE_NEG_Y; - sc = rx; - tc = -rz; - ma = ary; - } - } - else { - if (rz > 0.0F) { - face = PIPE_TEX_FACE_POS_Z; - sc = rx; - tc = -ry; - ma = arz; - } - else { - face = PIPE_TEX_FACE_NEG_Z; - sc = -rx; - tc = -ry; - ma = arz; - } - } - - *newS = ( sc / ma + 1.0F ) * 0.5F; - *newT = ( tc / ma + 1.0F ) * 0.5F; - - return face; -} - - -/** - * Examine the quad's texture coordinates to compute the partial - * derivatives w.r.t X and Y, then compute lambda (level of detail). - * - * This is only done for fragment shaders, not vertex shaders. - */ -static float -compute_lambda(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - float rho, lambda; - - if (samp->processor == TGSI_PROCESSOR_VERTEX) - return lodbias; - - assert(sampler->normalized_coords); - - assert(s); - { - float dsdx = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]; - float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]; - dsdx = fabsf(dsdx); - dsdy = fabsf(dsdy); - rho = MAX2(dsdx, dsdy) * texture->width[0]; - } - if (t) { - float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]; - float dtdy = t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]; - float max; - dtdx = fabsf(dtdx); - dtdy = fabsf(dtdy); - max = MAX2(dtdx, dtdy) * texture->height[0]; - rho = MAX2(rho, max); - } - if (p) { - float dpdx = p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]; - float dpdy = p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT]; - float max; - dpdx = fabsf(dpdx); - dpdy = fabsf(dpdy); - max = MAX2(dpdx, dpdy) * texture->depth[0]; - rho = MAX2(rho, max); - } - - lambda = util_fast_log2(rho); - lambda += lodbias + sampler->lod_bias; - lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); - - return lambda; -} - - -/** - * Do several things here: - * 1. Compute lambda from the texcoords, if needed - * 2. Determine if we're minifying or magnifying - * 3. If minifying, choose mipmap levels - * 4. Return image filter to use within mipmap images - * \param level0 Returns first mipmap level to sample from - * \param level1 Returns second mipmap level to sample from - * \param levelBlend Returns blend factor between levels, in [0,1] - * \param imgFilter Returns either the min or mag filter, depending on lambda - */ -static void -choose_mipmap_levels(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - unsigned *level0, unsigned *level1, float *levelBlend, - unsigned *imgFilter) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - - if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { - /* no mipmap selection needed */ - *level0 = *level1 = CLAMP((int) sampler->min_lod, - 0, (int) texture->last_level); - - if (sampler->min_img_filter != sampler->mag_img_filter) { - /* non-mipmapped texture, but still need to determine if doing - * minification or magnification. - */ - float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); - if (lambda <= 0.0) { - *imgFilter = sampler->mag_img_filter; - } - else { - *imgFilter = sampler->min_img_filter; - } - } - else { - *imgFilter = sampler->mag_img_filter; - } - } - else { - float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); - - if (lambda <= 0.0) { /* XXX threshold depends on the filter */ - /* magnifying */ - *imgFilter = sampler->mag_img_filter; - *level0 = *level1 = 0; - } - else { - /* minifying */ - *imgFilter = sampler->min_img_filter; - - /* choose mipmap level(s) and compute the blend factor between them */ - if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { - /* Nearest mipmap level */ - const int lvl = (int) (lambda + 0.5); - *level0 = - *level1 = CLAMP(lvl, 0, (int) texture->last_level); - } - else { - /* Linear interpolation between mipmap levels */ - const int lvl = (int) lambda; - *level0 = CLAMP(lvl, 0, (int) texture->last_level); - *level1 = CLAMP(lvl + 1, 0, (int) texture->last_level); - *levelBlend = FRAC(lambda); /* blending weight between levels */ - } - } - } -} - - -/** - * Get a texel from a texture, using the texture tile cache. - * - * \param face the cube face in 0..5 - * \param level the mipmap level - * \param x the x coord of texel within 2D image - * \param y the y coord of texel within 2D image - * \param z which slice of a 3D texture - * \param rgba the quad to put the texel/color into - * \param j which element of the rgba quad to write to - * - * XXX maybe move this into lp_tile_cache.c and merge with the - * lp_get_cached_tile_tex() function. Also, get 4 texels instead of 1... - */ -static void -get_texel_quad_2d(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, int x, int y, - const uint8_t *out[4]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - - const struct llvmpipe_cached_tex_tile *tile - = lp_get_cached_tex_tile(samp->cache, - tex_tile_address(x, y, 0, face, level)); - - y %= TEX_TILE_SIZE; - x %= TEX_TILE_SIZE; - - out[0] = &tile->color[y ][x ][0]; - out[1] = &tile->color[y ][x+1][0]; - out[2] = &tile->color[y+1][x ][0]; - out[3] = &tile->color[y+1][x+1][0]; -} - -static INLINE const uint8_t * -get_texel_2d_ptr(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, int x, int y) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - - const struct llvmpipe_cached_tex_tile *tile - = lp_get_cached_tex_tile(samp->cache, - tex_tile_address(x, y, 0, face, level)); - - y %= TEX_TILE_SIZE; - x %= TEX_TILE_SIZE; - - return &tile->color[y][x][0]; -} - - -static void -get_texel_quad_2d_mt(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, - int x0, int y0, - int x1, int y1, - const uint8_t *out[4]) -{ - unsigned i; - - for (i = 0; i < 4; i++) { - unsigned tx = (i & 1) ? x1 : x0; - unsigned ty = (i >> 1) ? y1 : y0; - - out[i] = get_texel_2d_ptr( tgsi_sampler, face, level, tx, ty ); - } -} - -static void -get_texel(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, int x, int y, int z, - float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - - if (x < 0 || x >= (int) texture->width[level] || - y < 0 || y >= (int) texture->height[level] || - z < 0 || z >= (int) texture->depth[level]) { - rgba[0][j] = sampler->border_color[0]; - rgba[1][j] = sampler->border_color[1]; - rgba[2][j] = sampler->border_color[2]; - rgba[3][j] = sampler->border_color[3]; - } - else { - const unsigned tx = x % TEX_TILE_SIZE; - const unsigned ty = y % TEX_TILE_SIZE; - const struct llvmpipe_cached_tex_tile *tile; - - tile = lp_get_cached_tex_tile(samp->cache, - tex_tile_address(x, y, z, face, level)); - - rgba[0][j] = ubyte_to_float(tile->color[ty][tx][0]); - rgba[1][j] = ubyte_to_float(tile->color[ty][tx][1]); - rgba[2][j] = ubyte_to_float(tile->color[ty][tx][2]); - rgba[3][j] = ubyte_to_float(tile->color[ty][tx][3]); - if (0) - { - debug_printf("Get texel %f %f %f %f from %s\n", - rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j], - pf_name(texture->format)); - } - } -} - - -/** - * Compare texcoord 'p' (aka R) against texture value 'rgba[0]' - * When we sampled the depth texture, the depth value was put into all - * RGBA channels. We look at the red channel here. - * \param rgba quad of (depth) texel values - * \param p texture 'P' components for four pixels in quad - * \param j which pixel in the quad to test [0..3] - */ -static INLINE void -shadow_compare(const struct pipe_sampler_state *sampler, - float rgba[NUM_CHANNELS][QUAD_SIZE], - const float p[QUAD_SIZE], - uint j) -{ - int k; - switch (sampler->compare_func) { - case PIPE_FUNC_LESS: - k = p[j] < rgba[0][j]; - break; - case PIPE_FUNC_LEQUAL: - k = p[j] <= rgba[0][j]; - break; - case PIPE_FUNC_GREATER: - k = p[j] > rgba[0][j]; - break; - case PIPE_FUNC_GEQUAL: - k = p[j] >= rgba[0][j]; - break; - case PIPE_FUNC_EQUAL: - k = p[j] == rgba[0][j]; - break; - case PIPE_FUNC_NOTEQUAL: - k = p[j] != rgba[0][j]; - break; - case PIPE_FUNC_ALWAYS: - k = 1; - break; - case PIPE_FUNC_NEVER: - k = 0; - break; - default: - k = 0; - assert(0); - break; - } - - /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ - rgba[0][j] = rgba[1][j] = rgba[2][j] = (float) k; - rgba[3][j] = 1.0F; -} - - -/** - * As above, but do four z/texture comparisons. - */ -static INLINE void -shadow_compare4(const struct pipe_sampler_state *sampler, - float rgba[NUM_CHANNELS][QUAD_SIZE], - const float p[QUAD_SIZE]) -{ - int j, k0, k1, k2, k3; - float val; - - /* compare four texcoords vs. four texture samples */ - switch (sampler->compare_func) { - case PIPE_FUNC_LESS: - k0 = p[0] < rgba[0][0]; - k1 = p[1] < rgba[0][1]; - k2 = p[2] < rgba[0][2]; - k3 = p[3] < rgba[0][3]; - break; - case PIPE_FUNC_LEQUAL: - k0 = p[0] <= rgba[0][0]; - k1 = p[1] <= rgba[0][1]; - k2 = p[2] <= rgba[0][2]; - k3 = p[3] <= rgba[0][3]; - break; - case PIPE_FUNC_GREATER: - k0 = p[0] > rgba[0][0]; - k1 = p[1] > rgba[0][1]; - k2 = p[2] > rgba[0][2]; - k3 = p[3] > rgba[0][3]; - break; - case PIPE_FUNC_GEQUAL: - k0 = p[0] >= rgba[0][0]; - k1 = p[1] >= rgba[0][1]; - k2 = p[2] >= rgba[0][2]; - k3 = p[3] >= rgba[0][3]; - break; - case PIPE_FUNC_EQUAL: - k0 = p[0] == rgba[0][0]; - k1 = p[1] == rgba[0][1]; - k2 = p[2] == rgba[0][2]; - k3 = p[3] == rgba[0][3]; - break; - case PIPE_FUNC_NOTEQUAL: - k0 = p[0] != rgba[0][0]; - k1 = p[1] != rgba[0][1]; - k2 = p[2] != rgba[0][2]; - k3 = p[3] != rgba[0][3]; - break; - case PIPE_FUNC_ALWAYS: - k0 = k1 = k2 = k3 = 1; - break; - case PIPE_FUNC_NEVER: - k0 = k1 = k2 = k3 = 0; - break; - default: - k0 = k1 = k2 = k3 = 0; - assert(0); - break; - } - - /* convert four pass/fail values to an intensity in [0,1] */ - val = 0.25F * (k0 + k1 + k2 + k3); - - /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ - for (j = 0; j < 4; j++) { - rgba[0][j] = rgba[1][j] = rgba[2][j] = val; - rgba[3][j] = 1.0F; - } -} - - - -static void -lp_get_samples_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - unsigned j; - unsigned level = samp->level; - unsigned xpot = 1 << (samp->xpot - level); - unsigned ypot = 1 << (samp->ypot - level); - unsigned xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */ - unsigned ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */ - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - - float u = s[j] * xpot - 0.5F; - float v = t[j] * ypot - 0.5F; - - int uflr = util_ifloor(u); - int vflr = util_ifloor(v); - - float xw = u - (float)uflr; - float yw = v - (float)vflr; - - int x0 = uflr & (xpot - 1); - int y0 = vflr & (ypot - 1); - - const uint8_t *tx[4]; - - - /* Can we fetch all four at once: - */ - if (x0 < xmax && y0 < ymax) - { - get_texel_quad_2d(tgsi_sampler, 0, level, x0, y0, tx); - } - else - { - unsigned x1 = (x0 + 1) & (xpot - 1); - unsigned y1 = (y0 + 1) & (ypot - 1); - get_texel_quad_2d_mt(tgsi_sampler, 0, level, - x0, y0, x1, y1, tx); - } - - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_2d(xw, yw, - ubyte_to_float(tx[0][c]), ubyte_to_float(tx[1][c]), - ubyte_to_float(tx[2][c]), ubyte_to_float(tx[3][c])); - } - } -} - - -static void -lp_get_samples_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - unsigned j; - unsigned level = samp->level; - unsigned xpot = 1 << (samp->xpot - level); - unsigned ypot = 1 << (samp->ypot - level); - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - - float u = s[j] * xpot; - float v = t[j] * ypot; - - int uflr = util_ifloor(u); - int vflr = util_ifloor(v); - - int x0 = uflr & (xpot - 1); - int y0 = vflr & (ypot - 1); - - const uint8_t *out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0); - - for (c = 0; c < 4; c++) { - rgba[c][j] = ubyte_to_float(out[c]); - } - } -} - - -static void -lp_get_samples_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - unsigned j; - unsigned level = samp->level; - unsigned xpot = 1 << (samp->xpot - level); - unsigned ypot = 1 << (samp->ypot - level); - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - - float u = s[j] * xpot; - float v = t[j] * ypot; - - int x0, y0; - const uint8_t *out; - - x0 = util_ifloor(u); - if (x0 < 0) - x0 = 0; - else if (x0 > xpot - 1) - x0 = xpot - 1; - - y0 = util_ifloor(v); - if (y0 < 0) - y0 = 0; - else if (y0 > ypot - 1) - y0 = ypot - 1; - - out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0); - - for (c = 0; c < 4; c++) { - rgba[c][j] = ubyte_to_float(out[c]); - } - } -} - - -static void -lp_get_samples_2d_linear_mip_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - int level0; - float lambda; - - lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); - level0 = (int)lambda; - - if (lambda < 0.0) { - samp->level = 0; - lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba ); - } - else if (level0 >= texture->last_level) { - samp->level = texture->last_level; - lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba ); - } - else { - float levelBlend = lambda - level0; - float rgba0[4][4]; - float rgba1[4][4]; - int c,j; - - samp->level = level0; - lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba0 ); - - samp->level = level0+1; - lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba1 ); - - for (j = 0; j < QUAD_SIZE; j++) { - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]); - } - } - } -} - -/** - * Common code for sampling 1D/2D/cube textures. - * Could probably extend for 3D... - */ -static void -lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE], - const unsigned faces[4]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - unsigned level0, level1, j, imgFilter; - int width, height; - float levelBlend; - - choose_mipmap_levels(tgsi_sampler, s, t, p, - lodbias, - &level0, &level1, &levelBlend, &imgFilter); - - assert(sampler->normalized_coords); - - width = texture->width[level0]; - height = texture->height[level0]; - - assert(width > 0); - - switch (imgFilter) { - case PIPE_TEX_FILTER_NEAREST: - { - int x[4], y[4]; - nearest_texcoord_4(sampler->wrap_s, s, width, x); - nearest_texcoord_4(sampler->wrap_t, t, height, y); - - for (j = 0; j < QUAD_SIZE; j++) { - get_texel(tgsi_sampler, faces[j], level0, x[j], y[j], 0, rgba, j); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare(sampler, rgba, p, j); - } - - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - unsigned c; - x[j] /= 2; - y[j] /= 2; - get_texel(tgsi_sampler, faces[j], level1, x[j], y[j], 0, - rgba2, j); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ - shadow_compare(sampler, rgba2, p, j); - } - - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); - } - } - } - } - break; - case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: - { - int x0[4], y0[4], x1[4], y1[4]; - float xw[4], yw[4]; /* weights */ - - linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw); - linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw); - - for (j = 0; j < QUAD_SIZE; j++) { - float tx[4][4]; /* texels */ - int c; - get_texel(tgsi_sampler, faces[j], level0, x0[j], y0[j], 0, tx, 0); - get_texel(tgsi_sampler, faces[j], level0, x1[j], y0[j], 0, tx, 1); - get_texel(tgsi_sampler, faces[j], level0, x0[j], y1[j], 0, tx, 2); - get_texel(tgsi_sampler, faces[j], level0, x1[j], y1[j], 0, tx, 3); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare4(sampler, tx, p); - } - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_2d(xw[j], yw[j], - tx[c][0], tx[c][1], - tx[c][2], tx[c][3]); - } - - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - - /* XXX: This is incorrect -- will often end up with (x0 - * == x1 && y0 == y1), meaning that we fetch the same - * texel four times and linearly interpolate between - * identical values. The correct approach would be to - * call linear_texcoord again for the second level. - */ - x0[j] /= 2; - y0[j] /= 2; - x1[j] /= 2; - y1[j] /= 2; - get_texel(tgsi_sampler, faces[j], level1, x0[j], y0[j], 0, tx, 0); - get_texel(tgsi_sampler, faces[j], level1, x1[j], y0[j], 0, tx, 1); - get_texel(tgsi_sampler, faces[j], level1, x0[j], y1[j], 0, tx, 2); - get_texel(tgsi_sampler, faces[j], level1, x1[j], y1[j], 0, tx, 3); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ - shadow_compare4(sampler, tx, p); - } - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba2[c][j] = lerp_2d(xw[j], yw[j], - tx[c][0], tx[c][1], tx[c][2], tx[c][3]); - } - - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); - } - } - } - } - break; - default: - assert(0); - } -} - - -static INLINE void -lp_get_samples_1d(struct tgsi_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - static const unsigned faces[4] = {0, 0, 0, 0}; - static const float tzero[4] = {0, 0, 0, 0}; - lp_get_samples_2d_common(sampler, s, tzero, NULL, - lodbias, rgba, faces); -} - - -static INLINE void -lp_get_samples_2d(struct tgsi_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - static const unsigned faces[4] = {0, 0, 0, 0}; - lp_get_samples_2d_common(sampler, s, t, p, - lodbias, rgba, faces); -} - - -static INLINE void -lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - /* get/map pipe_surfaces corresponding to 3D tex slices */ - unsigned level0, level1, j, imgFilter; - int width, height, depth; - float levelBlend; - const uint face = 0; - - choose_mipmap_levels(tgsi_sampler, s, t, p, - lodbias, - &level0, &level1, &levelBlend, &imgFilter); - - assert(sampler->normalized_coords); - - width = texture->width[level0]; - height = texture->height[level0]; - depth = texture->depth[level0]; - - assert(width > 0); - assert(height > 0); - assert(depth > 0); - - switch (imgFilter) { - case PIPE_TEX_FILTER_NEAREST: - { - int x[4], y[4], z[4]; - nearest_texcoord_4(sampler->wrap_s, s, width, x); - nearest_texcoord_4(sampler->wrap_t, t, height, y); - nearest_texcoord_4(sampler->wrap_r, p, depth, z); - for (j = 0; j < QUAD_SIZE; j++) { - get_texel(tgsi_sampler, face, level0, x[j], y[j], z[j], rgba, j); - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - unsigned c; - x[j] /= 2; - y[j] /= 2; - z[j] /= 2; - get_texel(tgsi_sampler, face, level1, x[j], y[j], z[j], rgba2, j); - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba2[c][j], rgba[c][j]); - } - } - } - } - break; - case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: - { - int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4]; - float xw[4], yw[4], zw[4]; /* interpolation weights */ - linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw); - linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw); - linear_texcoord_4(sampler->wrap_r, p, depth, z0, z1, zw); - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - float tx0[4][4], tx1[4][4]; - get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z0[j], tx0, 0); - get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z0[j], tx0, 1); - get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z0[j], tx0, 2); - get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z0[j], tx0, 3); - get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z1[j], tx1, 0); - get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z1[j], tx1, 1); - get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z1[j], tx1, 2); - get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z1[j], tx1, 3); - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j], - tx0[c][0], tx0[c][1], - tx0[c][2], tx0[c][3], - tx1[c][0], tx1[c][1], - tx1[c][2], tx1[c][3]); - } - - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - x0[j] /= 2; - y0[j] /= 2; - z0[j] /= 2; - x1[j] /= 2; - y1[j] /= 2; - z1[j] /= 2; - get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z0[j], tx0, 0); - get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z0[j], tx0, 1); - get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z0[j], tx0, 2); - get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z0[j], tx0, 3); - get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z1[j], tx1, 0); - get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z1[j], tx1, 1); - get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z1[j], tx1, 2); - get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z1[j], tx1, 3); - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba2[c][j] = lerp_3d(xw[j], yw[j], zw[j], - tx0[c][0], tx0[c][1], - tx0[c][2], tx0[c][3], - tx1[c][0], tx1[c][1], - tx1[c][2], tx1[c][3]); - } - - /* blend mipmap levels */ - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); - } - } - } - } - break; - default: - assert(0); - } -} - - -static void -lp_get_samples_cube(struct tgsi_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - unsigned faces[QUAD_SIZE], j; - float ssss[4], tttt[4]; - for (j = 0; j < QUAD_SIZE; j++) { - faces[j] = choose_cube_face(s[j], t[j], p[j], ssss + j, tttt + j); - } - lp_get_samples_2d_common(sampler, ssss, tttt, NULL, - lodbias, rgba, faces); -} - - -static void -lp_get_samples_rect(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - const uint face = 0; - unsigned level0, level1, j, imgFilter; - int width, height; - float levelBlend; - - choose_mipmap_levels(tgsi_sampler, s, t, p, - lodbias, - &level0, &level1, &levelBlend, &imgFilter); - - /* texture RECTS cannot be mipmapped */ - assert(level0 == level1); - - width = texture->width[level0]; - height = texture->height[level0]; - - assert(width > 0); - - switch (imgFilter) { - case PIPE_TEX_FILTER_NEAREST: - { - int x[4], y[4]; - nearest_texcoord_unnorm_4(sampler->wrap_s, s, width, x); - nearest_texcoord_unnorm_4(sampler->wrap_t, t, height, y); - for (j = 0; j < QUAD_SIZE; j++) { - get_texel(tgsi_sampler, face, level0, x[j], y[j], 0, rgba, j); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare(sampler, rgba, p, j); - } - } - } - break; - case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: - { - int x0[4], y0[4], x1[4], y1[4]; - float xw[4], yw[4]; /* weights */ - linear_texcoord_unnorm_4(sampler->wrap_s, s, width, x0, x1, xw); - linear_texcoord_unnorm_4(sampler->wrap_t, t, height, y0, y1, yw); - for (j = 0; j < QUAD_SIZE; j++) { - float tx[4][4]; /* texels */ - int c; - get_texel(tgsi_sampler, face, level0, x0[j], y0[j], 0, tx, 0); - get_texel(tgsi_sampler, face, level0, x1[j], y0[j], 0, tx, 1); - get_texel(tgsi_sampler, face, level0, x0[j], y1[j], 0, tx, 2); - get_texel(tgsi_sampler, face, level0, x1[j], y1[j], 0, tx, 3); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare4(sampler, tx, p); - } - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_2d(xw[j], yw[j], - tx[c][0], tx[c][1], tx[c][2], tx[c][3]); - } - } - } - break; - default: - assert(0); - } -} - - -/** - * Error condition handler - */ -static INLINE void -lp_get_samples_null(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - int i,j; - - for (i = 0; i < 4; i++) - for (j = 0; j < 4; j++) - rgba[i][j] = 1.0; -} - -/** - * Called via tgsi_sampler::get_samples() when using a sampler for the - * first time. Determine the actual sampler function, link it in and - * call it. - */ -void -lp_get_samples(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - - /* Default to the 'undefined' case: - */ - tgsi_sampler->get_samples = lp_get_samples_null; - - if (!texture) { - assert(0); /* is this legal?? */ - goto out; - } - - if (!sampler->normalized_coords) { - assert (texture->target == PIPE_TEXTURE_2D); - tgsi_sampler->get_samples = lp_get_samples_rect; - goto out; - } - - switch (texture->target) { - case PIPE_TEXTURE_1D: - tgsi_sampler->get_samples = lp_get_samples_1d; - break; - case PIPE_TEXTURE_2D: - tgsi_sampler->get_samples = lp_get_samples_2d; - break; - case PIPE_TEXTURE_3D: - tgsi_sampler->get_samples = lp_get_samples_3d; - break; - case PIPE_TEXTURE_CUBE: - tgsi_sampler->get_samples = lp_get_samples_cube; - break; - default: - assert(0); - break; - } - - /* Do this elsewhere: - */ - samp->xpot = util_unsigned_logbase2( samp->texture->width[0] ); - samp->ypot = util_unsigned_logbase2( samp->texture->height[0] ); - - /* Try to hook in a faster sampler. Ultimately we'll have to - * code-generate these. Luckily most of this looks like it is - * orthogonal state within the sampler. - */ - if (texture->target == PIPE_TEXTURE_2D && - sampler->min_img_filter == sampler->mag_img_filter && - sampler->wrap_s == sampler->wrap_t && - sampler->compare_mode == FALSE && - sampler->normalized_coords) - { - if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { - samp->level = CLAMP((int) sampler->min_lod, - 0, (int) texture->last_level); - - if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) { - switch (sampler->min_img_filter) { - case PIPE_TEX_FILTER_NEAREST: - tgsi_sampler->get_samples = lp_get_samples_2d_nearest_repeat_POT; - break; - case PIPE_TEX_FILTER_LINEAR: - tgsi_sampler->get_samples = lp_get_samples_2d_linear_repeat_POT; - break; - default: - break; - } - } - else if (sampler->wrap_s == PIPE_TEX_WRAP_CLAMP) { - switch (sampler->min_img_filter) { - case PIPE_TEX_FILTER_NEAREST: - tgsi_sampler->get_samples = lp_get_samples_2d_nearest_clamp_POT; - break; - default: - break; - } - } - } - else if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) { - switch (sampler->min_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - tgsi_sampler->get_samples = lp_get_samples_2d_linear_mip_linear_repeat_POT; - break; - default: - break; - } - } - } - } - else if (0) { - _debug_printf("target %d/%d min_mip %d/%d min_img %d/%d wrap %d/%d compare %d/%d norm %d/%d\n", - texture->target, PIPE_TEXTURE_2D, - sampler->min_mip_filter, PIPE_TEX_MIPFILTER_NONE, - sampler->min_img_filter, sampler->mag_img_filter, - sampler->wrap_s, sampler->wrap_t, - sampler->compare_mode, FALSE, - sampler->normalized_coords, TRUE); - } - -out: - tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba ); -} - - -void PIPE_CDECL -lp_fetch_texel_soa( struct tgsi_sampler **samplers, - uint32_t unit, - float *store ) -{ - struct tgsi_sampler *sampler = samplers[unit]; - -#if 0 - uint j; - - debug_printf("%s sampler: %p (%p) store: %p\n", - __FUNCTION__, - sampler, *sampler, - store ); - - debug_printf("lodbias %f\n", store[12]); - - for (j = 0; j < 4; j++) - debug_printf("sample %d texcoord %f %f\n", - j, - store[0+j], - store[4+j]); -#endif - - { - float rgba[NUM_CHANNELS][QUAD_SIZE]; - sampler->get_samples(sampler, - &store[0], - &store[4], - &store[8], - 0.0f, /*store[12], lodbias */ - rgba); - memcpy(store, rgba, sizeof rgba); - } - -#if 0 - for (j = 0; j < 4; j++) - debug_printf("sample %d result %f %f %f %f\n", - j, - store[0+j], - store[4+j], - store[8+j], - store[12+j]); -#endif -} - - -#include "lp_bld_type.h" -#include "lp_bld_intr.h" -#include "lp_bld_tgsi.h" - - -struct lp_c_sampler_soa -{ - struct lp_build_sampler_soa base; - - LLVMValueRef context_ptr; - - LLVMValueRef samplers_ptr; - - /** Coords/texels store */ - LLVMValueRef store_ptr; -}; - - -static void -lp_c_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) -{ - FREE(sampler); -} - - -static void -lp_c_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *_sampler, - LLVMBuilderRef builder, - struct lp_type type, - unsigned unit, - unsigned num_coords, - const LLVMValueRef *coords, - LLVMValueRef lodbias, - LLVMValueRef *texel) -{ - struct lp_c_sampler_soa *sampler = (struct lp_c_sampler_soa *)_sampler; - LLVMTypeRef vec_type = LLVMTypeOf(coords[0]); - LLVMValueRef args[3]; - unsigned i; - - if(!sampler->samplers_ptr) - sampler->samplers_ptr = lp_jit_context_samplers(builder, sampler->context_ptr); - - if(!sampler->store_ptr) - sampler->store_ptr = LLVMBuildArrayAlloca(builder, - vec_type, - LLVMConstInt(LLVMInt32Type(), 4, 0), - "texel_store"); - - for (i = 0; i < num_coords; i++) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef coord_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, ""); - LLVMBuildStore(builder, coords[i], coord_ptr); - } - - args[0] = sampler->samplers_ptr; - args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0); - args[2] = sampler->store_ptr; - - lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3); - - for (i = 0; i < NUM_CHANNELS; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef texel_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, ""); - texel[i] = LLVMBuildLoad(builder, texel_ptr, ""); - } -} - - -struct lp_build_sampler_soa * -lp_c_sampler_soa_create(LLVMValueRef context_ptr) -{ - struct lp_c_sampler_soa *sampler; - - sampler = CALLOC_STRUCT(lp_c_sampler_soa); - if(!sampler) - return NULL; - - sampler->base.destroy = lp_c_sampler_soa_destroy; - sampler->base.emit_fetch_texel = lp_c_sampler_soa_emit_fetch_texel; - sampler->context_ptr = context_ptr; - - return &sampler->base; -} - diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 08f0950d47..1682e37354 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -40,7 +40,6 @@ #include "lp_context.h" #include "lp_state.h" #include "lp_texture.h" -#include "lp_tex_cache.h" #include "lp_screen.h" #include "lp_winsys.h" @@ -241,7 +240,7 @@ llvmpipe_get_tex_surface(struct pipe_screen *screen, if (ps->usage & (PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_GPU_WRITE)) { - /* Mark the surface as dirty. The tile cache will look for this. */ + /* Mark the surface as dirty. */ lpt->timestamp++; llvmpipe_screen(screen)->timestamp++; } @@ -368,7 +367,6 @@ llvmpipe_transfer_map( struct pipe_screen *_screen, if (transfer->texture && (transfer->usage & PIPE_TRANSFER_WRITE)) { /* Do something to notify sharing contexts of a texture change. - * In llvmpipe, that would mean flushing the texture cache. */ screen->timestamp++; } -- cgit v1.2.3 From ee3383bc5ab81ff12d9faa675c1c38683300ce68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Thu, 8 Oct 2009 17:06:03 +0100 Subject: llvmpipe: Update more copyright headers. --- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 27 +++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_setup_context.h | 5 +++-- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 7eced38d67..b819519553 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #ifndef LP_RAST_PRIV_H #define LP_RAST_PRIV_H diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 91540d6751..eeee7159d9 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2007-2009 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -18,12 +18,13 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * **************************************************************************/ + #ifndef LP_SETUP_CONTEXT_H #define LP_SETUP_CONTEXT_H -- cgit v1.2.3 From ce7ac8e7439fba74fc1ee368559dd520a2d1eabe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Thu, 8 Oct 2009 17:06:32 +0100 Subject: llvmpipe: Update lp_setup_clear invocation. --- src/gallium/drivers/llvmpipe/lp_clear.c | 2 +- src/gallium/drivers/llvmpipe/lp_setup.h | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_clear.c b/src/gallium/drivers/llvmpipe/lp_clear.c index 9efb3d4083..4bae44e2ea 100644 --- a/src/gallium/drivers/llvmpipe/lp_clear.c +++ b/src/gallium/drivers/llvmpipe/lp_clear.c @@ -55,5 +55,5 @@ llvmpipe_clear(struct pipe_context *pipe, if (llvmpipe->no_rast) return; - lp_setup_clear( llvmpipe->setup, buffers, rgba, depth, stencil ); + lp_setup_clear( llvmpipe->setup, rgba, depth, stencil, buffers ); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 39e7b558c8..6d741f7271 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -27,6 +27,7 @@ #ifndef LP_SETUP_H #define LP_SETUP_H +#include "pipe/p_compiler.h" enum lp_interp { LP_INTERP_CONSTANT, @@ -42,11 +43,11 @@ struct setup_context * lp_setup_create( void ); void -lp_setup_clear(struct pipe_context *pipe, - unsigned buffers, - const float *rgba, - double depth, - unsigned stencil); +lp_setup_clear(struct setup_context *setup, + const float *clear_color, + double clear_depth, + unsigned clear_stencil, + unsigned flags); void lp_setup_triangle(struct setup_context *setup, -- cgit v1.2.3 From 5974b80380de1a2fcaf71c638a8a11973379529d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Thu, 8 Oct 2009 17:06:49 +0100 Subject: llvmpipe: Remove dead files from SConscript. --- src/gallium/drivers/llvmpipe/SConscript | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 5e0fadc247..b39bc76da0 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -61,8 +61,6 @@ llvmpipe = env.ConvenienceLibrary( 'lp_state_vertex.c', 'lp_state_vs.c', 'lp_surface.c', - 'lp_tex_cache.c', - 'lp_tex_sample_c.c', 'lp_tex_sample_llvm.c', 'lp_texture.c', 'lp_tile_soa.c', -- cgit v1.2.3 From 921584181eb2f3b2849d150295dfce1dae25dd11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Thu, 8 Oct 2009 17:26:13 +0100 Subject: llvmpipe: Fix up lp_rast_shade_quads. --- src/gallium/drivers/llvmpipe/lp_jit.h | 4 ++-- src/gallium/drivers/llvmpipe/lp_rast.c | 22 ++++++++++++---------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 58f716ede2..643e85be20 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -108,13 +108,13 @@ struct lp_jit_context typedef void -(*lp_jit_frag_func)(struct lp_jit_context *context, +(*lp_jit_frag_func)(const struct lp_jit_context *context, uint32_t x, uint32_t y, const void *a0, const void *dadx, const void *dady, - uint32_t *mask, + const uint32_t *mask, void *color, void *depth); diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index df48ccce81..e3d1cd56e0 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -30,6 +30,9 @@ #include "lp_state.h" #include "lp_quad.h" #include "lp_rast.h" +#include "lp_rast_priv.h" +#include "lp_tile_soa.h" +#include "lp_bld_debug.h" struct lp_rasterizer *lp_rast_create( void ) @@ -137,7 +140,6 @@ void lp_rast_shade_quads( const struct lp_rast_state *state, struct quad_header **quads, unsigned nr ) { - struct lp_fragment_shader *fs = llvmpipe->fs; struct quad_header *quad = quads[0]; const unsigned x = quad->input.x0; const unsigned y = quad->input.y0; @@ -167,7 +169,7 @@ void lp_rast_shade_quads( const struct lp_rast_state *state, /* depth buffer */ assert((x % 2) == 0); assert((y % 2) == 0); - depth = (uint8_t)*tile->depth + y*TILE_SIZE*4 + 2*x*4; + depth = (uint8_t *)tile->depth + y*TILE_SIZE*4 + 2*x*4; /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ assert(lp_check_alignment(mask, 16)); @@ -177,14 +179,14 @@ void lp_rast_shade_quads( const struct lp_rast_state *state, assert(lp_check_alignment(state->jc.blend_color, 16)); /* run shader */ - state->jit_function( &state->jc, - x, y, - quad->coef->a0, - quad->coef->dadx, - quad->coef->dady, - &mask[0][0], - color, - depth); + state->shader( &state->jc, + x, y, + quad->coef->a0, + quad->coef->dadx, + quad->coef->dady, + &mask[0][0], + color, + depth); } -- cgit v1.2.3 From d0c918b87a9fb0e86d6b3efedf3ef505e04c527f Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 8 Oct 2009 17:20:40 +0100 Subject: llvmpipe: remove some old sampler support structs --- src/gallium/drivers/llvmpipe/lp_context.c | 40 +++++-------------------- src/gallium/drivers/llvmpipe/lp_context.h | 12 ++------ src/gallium/drivers/llvmpipe/lp_jit.c | 2 +- src/gallium/drivers/llvmpipe/lp_jit.h | 2 +- src/gallium/drivers/llvmpipe/lp_prim_vbuf.c | 11 +++---- src/gallium/drivers/llvmpipe/lp_rast.c | 44 +++++++++++++--------------- src/gallium/drivers/llvmpipe/lp_setup.h | 13 +++++--- src/gallium/drivers/llvmpipe/lp_tex_sample.h | 28 ------------------ 8 files changed, 45 insertions(+), 107 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index f087b65321..7f7b04412c 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -44,6 +44,7 @@ #include "lp_texture.h" #include "lp_winsys.h" #include "lp_query.h" +#include "lp_setup.h" @@ -85,20 +86,8 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe, unsigned face, unsigned level) { struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); - unsigned i; - if (lp_setup_is_active(llvmpipe->setup)) { - for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) { - if(llvmpipe->framebuffer.cbufs[i] && - llvmpipe->framebuffer.cbufs[i]->texture == texture) - return PIPE_REFERENCED_FOR_WRITE; - } - if(llvmpipe->framebuffer.zsbuf && - llvmpipe->framebuffer.zsbuf->texture == texture) - return PIPE_REFERENCED_FOR_WRITE; - } - - return PIPE_UNREFERENCED; + return lp_setup_is_texture_referenced(llvmpipe->setup, texture); } static unsigned int @@ -112,7 +101,6 @@ struct pipe_context * llvmpipe_create( struct pipe_screen *screen ) { struct llvmpipe_context *llvmpipe; - uint i; llvmpipe = align_malloc(sizeof(struct llvmpipe_context), 16); if (!llvmpipe) @@ -178,20 +166,6 @@ llvmpipe_create( struct pipe_screen *screen ) llvmpipe_init_query_funcs( llvmpipe ); llvmpipe_init_texture_funcs( llvmpipe ); - /* vertex shader samplers */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - llvmpipe->tgsi.vert_samplers[i].base.get_samples = lp_get_samples; - llvmpipe->tgsi.vert_samplers[i].processor = TGSI_PROCESSOR_VERTEX; - llvmpipe->tgsi.vert_samplers_list[i] = &llvmpipe->tgsi.vert_samplers[i]; - } - - /* fragment shader samplers */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - llvmpipe->tgsi.frag_samplers[i].base.get_samples = lp_get_samples; - llvmpipe->tgsi.frag_samplers[i].processor = TGSI_PROCESSOR_FRAGMENT; - llvmpipe->tgsi.frag_samplers_list[i] = &llvmpipe->tgsi.frag_samplers[i]; - } - /* * Create drawing context and plug our rendering stage into it. */ @@ -199,14 +173,16 @@ llvmpipe_create( struct pipe_screen *screen ) if (!llvmpipe->draw) goto fail; - draw_texture_samplers(llvmpipe->draw, - PIPE_MAX_SAMPLERS, - (struct tgsi_sampler **) - llvmpipe->tgsi.vert_samplers_list); + /* FIXME: vertex sampler state + */ if (debug_get_bool_option( "LP_NO_RAST", FALSE )) llvmpipe->no_rast = TRUE; + llvmpipe->setup = lp_setup_create(); + if (!llvmpipe->setup) + goto fail; + llvmpipe->vbuf_backend = lp_create_vbuf_backend(llvmpipe); if (!llvmpipe->vbuf_backend) goto fail; diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 17e8897546..852f7a1d05 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -45,7 +45,7 @@ struct draw_stage; struct lp_fragment_shader; struct lp_vertex_shader; struct lp_blend_state; -struct lp_setup_context; +struct setup_context; struct llvmpipe_context { struct pipe_context pipe; /**< base class */ @@ -110,16 +110,8 @@ struct llvmpipe_context { /** Derived from scissor and surface bounds: */ struct pipe_scissor_state cliprect; - /** TGSI exec things */ - struct { - struct lp_shader_sampler vert_samplers[PIPE_MAX_SAMPLERS]; - struct lp_shader_sampler *vert_samplers_list[PIPE_MAX_SAMPLERS]; - struct lp_shader_sampler frag_samplers[PIPE_MAX_SAMPLERS]; - struct lp_shader_sampler *frag_samplers_list[PIPE_MAX_SAMPLERS]; - } tgsi; - /** The tiling engine */ - struct lp_setup_context *setup; + struct setup_context *setup; /** The primitive drawing context */ struct draw_context *draw; diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 1126bf90b9..a03eb874ac 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -91,7 +91,7 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, constants, screen->target, context_type, 0); - LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, samplers, + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, dummy, screen->target, context_type, 1); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, alpha_ref_value, screen->target, context_type, 2); diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 643e85be20..207dfbfde1 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -78,7 +78,7 @@ struct lp_jit_context { const float *constants; - struct tgsi_sampler **samplers; + void *dummy; /* remove me */ float alpha_ref_value; diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c index 6c51d40a8f..925e6f8b3b 100644 --- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c @@ -136,9 +136,8 @@ static boolean lp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) { struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - struct setup_context *setup_ctx = cvbr->setup; - llvmpipe_update_state( setup_ctx->llvmpipe ); + llvmpipe_update_derived( cvbr->llvmpipe ); cvbr->llvmpipe->reduced_prim = u_reduced_prim(prim); cvbr->prim = prim; @@ -524,9 +523,7 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) static void lp_vbuf_destroy(struct vbuf_render *vbr) { - struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - lp_setup_destroy_context(cvbr->setup); - FREE(cvbr); + FREE(vbr); } @@ -539,6 +536,7 @@ lp_create_vbuf_backend(struct llvmpipe_context *lp) struct llvmpipe_vbuf_render *cvbr = CALLOC_STRUCT(llvmpipe_vbuf_render); assert(lp->draw); + assert(lp->setup); cvbr->base.max_indices = LP_MAX_VBUF_INDEXES; @@ -555,8 +553,7 @@ lp_create_vbuf_backend(struct llvmpipe_context *lp) cvbr->base.destroy = lp_vbuf_destroy; cvbr->llvmpipe = lp; - - cvbr->setup = lp_setup_create_context(cvbr->llvmpipe); + cvbr->setup = lp->setup; return &cvbr->base; } diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index e3d1cd56e0..498879e4cf 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -40,7 +40,7 @@ struct lp_rasterizer *lp_rast_create( void ) return CALLOC_STRUCT(lp_rasterizer); } -void lp_rast_bind_surfaces( struct lp_rasterizer *, +void lp_rast_bind_surfaces( struct lp_rasterizer *rast, struct pipe_surface *color, struct pipe_surface *zstencil, const float *clear_color, @@ -49,11 +49,9 @@ void lp_rast_bind_surfaces( struct lp_rasterizer *, { pipe_surface_reference(&rast->state.color, color); pipe_surface_reference(&rast->state.depth, depth); - rast->state.clear_color = util_pack_8888(clear_color); - rast->state.clear_depth = clear_depth * 0xffffffff; - rast->state.clear_stencil = clear_stencil; } + /* Begining of each tile: */ void lp_rast_start_tile( struct lp_rasterizer *, @@ -64,9 +62,10 @@ void lp_rast_start_tile( struct lp_rasterizer *, rast->y = y; } -void lp_rast_clear_color( struct lp_rasterizer *rast ) +void lp_rast_clear_color( struct lp_rasterizer *rast, + const union lp_rast_cmd_arg *arg ) { - const unsigned clear_color = rast->state.clear_color; + const unsigned clear_color = arg->clear.clear_color; unsigned i, j; for (i = 0; i < TILESIZE; i++) @@ -74,9 +73,10 @@ void lp_rast_clear_color( struct lp_rasterizer *rast ) rast->tile[i][j] = clear_color; } -void lp_rast_clear_depth( struct lp_rasterizer *rast ) +void lp_rast_clear_zstencil( struct lp_rasterizer *rast, + const union lp_rast_cmd_arg *arg) { - const unsigned clear_depth = rast->state.clear_depth; + const unsigned clear_color = arg->clear.clear_zstencil; unsigned i, j; for (i = 0; i < TILESIZE; i++) @@ -84,19 +84,15 @@ void lp_rast_clear_depth( struct lp_rasterizer *rast ) rast->tile[i][j] = clear_depth; } -void lp_rast_clear_stencil( struct lp_rasterizer *rast ) -{ - const unsigned clear_stencil = rast->state.clear_stencil; - - memset(rast->tile.stencil, clear_stencil, sizeof rast->tile.stencil ); -} -void lp_rast_load_color( struct lp_rasterizer *rast ) +void lp_rast_load_color( struct lp_rasterizer *rast, + const union lp_rast_cmd_arg *arg) { /* call u_tile func to load colors from surface */ } -void lp_rast_load_zstencil( struct lp_rasterizer *rast ) +void lp_rast_load_zstencil( struct lp_rasterizer *rast, + const union lp_rast_cmd_arg *arg ) { /* call u_tile func to load depth (and stencil?) from surface */ } @@ -104,15 +100,15 @@ void lp_rast_load_zstencil( struct lp_rasterizer *rast ) /* Within a tile: */ void lp_rast_set_state( struct lp_rasterizer *rast, - const struct lp_rast_state *state ) + const union lp_rast_cmd_arg *arg ) { - rast->shader_state = state; - lp->quad.first->begin( lp->quad.first ); + rast->shader_state = arg->state; } void lp_rast_shade_tile( struct lp_rasterizer *rast, + const union lp_rast_cmd_arg *arg ) const struct lp_rast_shader_inputs *inputs ) { /* Set up the silly quad coef pointers @@ -193,14 +189,14 @@ void lp_rast_shade_quads( const struct lp_rast_state *state, /* End of tile: */ -void lp_rast_store_color( struct lp_rasterizer *rast ) +void lp_rast_end_tile( struct lp_rasterizer *rast, + boolean write_depth ) { /* call u_tile func to store colors to surface */ -} -void lp_rast_store_zstencil( struct lp_rasterizer *rast ) -{ - /* call u_tile func to store depth/stencil to surface */ + if (write_depth) { + /* call u_tile func to store depth/stencil to surface */ + } } /* Shutdown: diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 6d741f7271..5151a174f2 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -50,10 +50,10 @@ lp_setup_clear(struct setup_context *setup, unsigned flags); void -lp_setup_triangle(struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4]); +lp_setup_tri(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]); void lp_setup_line(struct setup_context *setup, @@ -74,6 +74,11 @@ lp_setup_set_fs_inputs( struct setup_context *setup, const enum lp_interp *interp, unsigned nr ); +boolean +lp_setup_is_texture_referenced( struct setup_context *setup, + const struct pipe_texture *texture ); + + void lp_setup_destroy( struct setup_context *setup ); diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h index 526ea100db..dfc9c0e6f0 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h @@ -37,34 +37,6 @@ struct lp_sampler_static_state; -/** - * Subclass of tgsi_sampler - */ -struct lp_shader_sampler -{ - struct tgsi_sampler base; /**< base class */ - - unsigned processor; - - /* For lp_get_samples_2d_linear_POT: - */ - unsigned xpot; - unsigned ypot; - unsigned level; - - const struct pipe_texture *texture; - const struct pipe_sampler_state *sampler; -}; - - - -static INLINE struct lp_shader_sampler * -lp_shader_sampler(const struct tgsi_sampler *sampler) -{ - return (struct lp_shader_sampler *) sampler; -} - - extern void lp_get_samples(struct tgsi_sampler *tgsi_sampler, -- cgit v1.2.3 From 1caa26202c3bcc41ea5829b646128088e14d5dfd Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 8 Oct 2009 17:52:35 +0100 Subject: llvmpipe: start cleaning up --- src/gallium/drivers/llvmpipe/SConscript | 4 +-- src/gallium/drivers/llvmpipe/lp_rast.h | 12 ++++++-- src/gallium/drivers/llvmpipe/lp_setup.c | 41 ++++++++++++++----------- src/gallium/drivers/llvmpipe/lp_setup.h | 1 + src/gallium/drivers/llvmpipe/lp_setup_context.h | 26 ++++++++++------ 5 files changed, 52 insertions(+), 32 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index b39bc76da0..f6945535ca 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -47,8 +47,6 @@ llvmpipe = env.ConvenienceLibrary( 'lp_jit.c', 'lp_prim_vbuf.c', 'lp_query.c', - 'lp_rast.c', - 'lp_rast_tri.c', 'lp_setup.c', 'lp_screen.c', 'lp_state_blend.c', @@ -61,6 +59,8 @@ llvmpipe = env.ConvenienceLibrary( 'lp_state_vertex.c', 'lp_state_vs.c', 'lp_surface.c', + 'lp_rast.c', + 'lp_rast_tri.c', 'lp_tex_sample_llvm.c', 'lp_texture.c', 'lp_tile_soa.c', diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 33a6065b89..f40208bbda 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -28,6 +28,8 @@ #ifndef LP_RAST_H #define LP_RAST_H +#include "lp_jit.h" + /* Initially create and program a single rasterizer directly. Later * will want multiple of these, one or two per core. At that stage * will probably pass command buffers into the rasterizers rather than @@ -35,6 +37,9 @@ */ struct lp_rasterizer; +#define TILESIZE 64 + + struct lp_rast_state { /* State for the shader: */ @@ -55,10 +60,11 @@ struct lp_rast_shader_inputs { */ const struct lp_rast_state *state; - /* Attribute interpolation: + /* Attribute interpolation: FIXME: reduce memory waste! */ - struct tgsi_interp_coef position_coef; - struct tgsi_interp_coef *coef; + float a0[PIPE_MAX_ATTRIBS][4]; + float dadx[PIPE_MAX_ATTRIBS][4]; + float dady[PIPE_MAX_ATTRIBS][4]; }; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 514366b71f..43a4f5f029 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -32,7 +32,7 @@ * lp_setup_flush(). */ -#include "lp_setup.h" +#include "lp_setup_context.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -56,31 +56,33 @@ void lp_setup_new_data_block( struct data_block_list *list ) static void reset_context( struct setup_context *setup ) { + unsigned i, j; + for (i = 0; i < setup->tiles_x; i++) { for (j = 0; j < setup->tiles_y; j++) { - struct cmd_block_list *list = scene->tile[i][j]; + struct cmd_block_list *list = &setup->tile[i][j]; struct cmd_block *block; struct cmd_block *tmp; - for (block = list->first; block != list->tail; block = tmp) { + for (block = list->head; block != list->tail; block = tmp) { tmp = block->next; FREE(block); } - list->first = list->tail; + list->head = list->tail; } } { - struct data_block_list *list = &scene->data; + struct data_block_list *list = &setup->data; struct data_block *block, *tmp; - for (block = list->first; block != list->tail; block = tmp) { + for (block = list->head; block != list->tail; block = tmp) { tmp = block->next; FREE(block); } - list->first = list->tail; + list->head = list->tail; } } @@ -90,39 +92,42 @@ static void reset_context( struct setup_context *setup ) /* Add a command to all active bins. */ static void bin_everywhere( struct setup_context *setup, - bin_cmd cmd, + lp_rast_cmd cmd, const union lp_rast_cmd_arg *arg ) { unsigned i, j; for (i = 0; i < setup->tiles_x; i++) for (j = 0; j < setup->tiles_y; j++) - bin_cmd( setup, &setup->tile[i][j], cmd, arg ); + bin_cmd( &setup->tile[i][j], cmd, arg ); } static void rasterize_bins( struct setup_context *setup, - struct lp_rast *rast, boolean write_depth ) { + struct lp_rasterizer *rast = setup->rast; + struct cmd_block *block; + unsigned i,j,k; + lp_rast_bind_color( rast, - scene->fb.color, + setup->fb.color, TRUE ); /* WRITE */ lp_rast_bind_depth( rast, - scene->fb.depth, + setup->fb.zstencil, write_depth ); /* WRITE */ - for (i = 0; i < scene->tiles_x; i++) { - for (j = 0; j < scene->tiles_y; j++) { + for (i = 0; i < setup->tiles_x; i++) { + for (j = 0; j < setup->tiles_y; j++) { lp_rast_start_tile( rast, i * TILESIZE, j * TILESIZE ); - for (block = scene->tile[i][j].first; block; block = block->next) { - for (k = 0; k < block->nr_cmds; k++) { - block->cmd[k].func( rast, block->cmd[k].arg ); + for (block = setup->tile[i][j].head; block; block = block->next) { + for (k = 0; k < block->count; k++) { + block->cmd[k]( rast, block->arg[k] ); } } @@ -130,7 +135,7 @@ rasterize_bins( struct setup_context *setup, } } - lp_setup_free_data( setup ); + reset_context( setup ); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 5151a174f2..6f560f5f93 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -37,6 +37,7 @@ enum lp_interp { LP_INTERP_FACING }; +struct pipe_texture; struct setup_context; struct setup_context * diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index eeee7159d9..19d163df8e 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -28,23 +28,25 @@ #ifndef LP_SETUP_CONTEXT_H #define LP_SETUP_CONTEXT_H +#include "lp_setup.h" +#include "lp_rast.h" #define CMD_BLOCK_MAX 128 #define DATA_BLOCK_SIZE (16 * 1024 - sizeof(unsigned) - sizeof(void *)) /* switch to a non-pointer value for this: */ -typedef void (*lp_rast_cmd)( struct lp_rast *, const union lp_rast_cmd_arg * ); +typedef void (*lp_rast_cmd)( struct lp_rasterizer *, const union lp_rast_cmd_arg * ); struct cmd_block { - union lp_rast_arg *arg[CMD_BLOCK_MAX]; lp_rast_cmd cmd[CMD_BLOCK_MAX]; + const union lp_rast_cmd_arg *arg[CMD_BLOCK_MAX]; unsigned count; struct cmd_block *next; }; struct data_block { - ubyte data[DATA_BLOCK_SZ]; + ubyte data[DATA_BLOCK_SIZE]; unsigned used; struct data_block *next; }; @@ -68,10 +70,12 @@ struct data_block_list { struct setup_context { + struct lp_rasterizer *rast; + /* When there are multiple threads, will want to double-buffer the * bin arrays: */ - struct cmd_block_list bin[MAXHEIGHT / TILESIZE][MAXWIDTH / TILESIZE]; + struct cmd_block_list tile[MAXHEIGHT / TILESIZE][MAXWIDTH / TILESIZE]; struct data_block_list data; unsigned tiles_x; @@ -110,9 +114,12 @@ struct setup_context { void (*triangle)( struct setup_context *, const float (*v0)[4], const float (*v1)[4], - const float (*v1)[4]); + const float (*v2)[4]); }; +void lp_setup_new_data_block( struct data_block_list *list ); +void lp_setup_new_cmd_block( struct cmd_block_list *list ); + static INLINE void *get_data( struct data_block_list *list, unsigned size) { @@ -123,7 +130,7 @@ static INLINE void *get_data( struct data_block_list *list, { struct data_block *tail = list->tail; - char *data = tail->data + tail->used; + ubyte *data = tail->data + tail->used; tail->used += size; return data; } @@ -132,11 +139,11 @@ static INLINE void *get_data( struct data_block_list *list, /* Add a command to a given bin. */ static INLINE void bin_cmd( struct cmd_block_list *list, - bin_cmd cmd, + lp_rast_cmd cmd, const union lp_rast_cmd_arg *arg ) { - if (list->tail.count == CMD_BLOCK_MAX) { - lp_setup_new_cmd_block( list ) + if (list->tail->count == CMD_BLOCK_MAX) { + lp_setup_new_cmd_block( list ); } { @@ -150,3 +157,4 @@ static INLINE void bin_cmd( struct cmd_block_list *list, +#endif -- cgit v1.2.3 From 37b86aa55c6bb520997c00dbf1a2b38d4aed38eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Thu, 8 Oct 2009 17:59:44 +0100 Subject: llvmpipe: Implement some of the rasterizer functions. --- src/gallium/drivers/llvmpipe/lp_rast.c | 88 +++++++++++++++++++++++++++++----- 1 file changed, 75 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 498879e4cf..2217debc02 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -37,7 +37,16 @@ struct lp_rasterizer *lp_rast_create( void ) { - return CALLOC_STRUCT(lp_rasterizer); + struct lp_rasterizer *rast; + + rast = CALLOC_STRUCT(lp_rasterizer); + if(!rast) + return NULL; + + rast->tile.color = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); + rast->tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); + + return rast; } void lp_rast_bind_surfaces( struct lp_rasterizer *rast, @@ -54,7 +63,7 @@ void lp_rast_bind_surfaces( struct lp_rasterizer *rast, /* Begining of each tile: */ -void lp_rast_start_tile( struct lp_rasterizer *, +void lp_rast_start_tile( struct lp_rasterizer *rast, unsigned x, unsigned y ) { @@ -68,9 +77,17 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, const unsigned clear_color = arg->clear.clear_color; unsigned i, j; - for (i = 0; i < TILESIZE; i++) - for (j = 0; j < TILESIZE; j++) - rast->tile[i][j] = clear_color; + if (clear_color[0] == clear_color[1] && + clear_color[1] == clear_color[2] && + clear_color[2] == clear_color[3]) { + memset(rast->tile.color, clear_color[0], TILE_SIZE * TILE_SIZE * 4); + } + else { + for (y = 0; y < TILE_SIZE; y++) + for (x = 0; x < TILE_SIZE; x++) + for (chan = 0; chan < 4; ++chan) + TILE_PIXEL(rast->tile.color, x, y, chan) = clear_color[chan]; + } } void lp_rast_clear_zstencil( struct lp_rasterizer *rast, @@ -79,9 +96,9 @@ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, const unsigned clear_color = arg->clear.clear_zstencil; unsigned i, j; - for (i = 0; i < TILESIZE; i++) - for (j = 0; j < TILESIZE; j++) - rast->tile[i][j] = clear_depth; + for (i = 0; i < TILE_SIZE; i++) + for (j = 0; j < TILE_SIZE; j++) + rast->tile.depth[i][j] = clear_depth; } @@ -108,9 +125,11 @@ void lp_rast_set_state( struct lp_rasterizer *rast, void lp_rast_shade_tile( struct lp_rasterizer *rast, - const union lp_rast_cmd_arg *arg ) + const union lp_rast_cmd_arg *arg, const struct lp_rast_shader_inputs *inputs ) { + unsigned i; + /* Set up the silly quad coef pointers */ for (i = 0; i < 4; i++) { @@ -120,8 +139,8 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, /* Use the existing preference for 8x2 (four quads) shading: */ - for (i = 0; i < TILESIZE; i += 8) { - for (j = 0; j < TILESIZE; j += 2) { + for (i = 0; i < TILE_SIZE; i += 8) { + for (j = 0; j < TILE_SIZE; j += 2) { rast->shader_state.shade( inputs->jc, rast->x + i, rast->y + j, @@ -189,13 +208,54 @@ void lp_rast_shade_quads( const struct lp_rast_state *state, /* End of tile: */ + + void lp_rast_end_tile( struct lp_rasterizer *rast, boolean write_depth ) { - /* call u_tile func to store colors to surface */ + struct pipe_surface *surface; + struct pipe_screen *screen; + struct pipe_transfer *transfer; + const unsigned x = rast->x; + const unsigned y = rast->y; + unsigned w = TILE_SIZE; + unsigned h = TILE_SIZE; + + surface = rast->state.color; + if(!surface) + return; + + screen = surface->texture->screen; + + if(x + w > surface->width) + w = surface->width - x; + if(y + h > surface->height) + h = surface->height - x; + + transfer = screen->get_tex_transfer(screen, + surface->texture, + surface->face, + surface->level, + surface->zslice, + PIPE_TRANSFER_READ_WRITE, + x, y, w, h); + if(!transfer) + return; + + map = screen->transfer_map(screen, transfer); + if(map) { + lp_tile_write_4ub(transfer->format, + rast->tile.color, + map, transfer->stride, + x, y, w, h); + + screen->transfer_unmap(screen, transfer); + } + + screen->tex_transfer_destroy(screen, transfer); if (write_depth) { - /* call u_tile func to store depth/stencil to surface */ + /* FIXME: call u_tile func to store depth/stencil to surface */ } } @@ -203,6 +263,8 @@ void lp_rast_end_tile( struct lp_rasterizer *rast, */ void lp_rast_destroy( struct lp_rasterizer *rast ) { + align_free(rast->tile.depth); + align_free(rast->tile.color); FREE(rast); } -- cgit v1.2.3 From 35a90e67ebc37dc0a8432db76c91b8855a94598a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Thu, 8 Oct 2009 17:59:57 +0100 Subject: llvmpipe: Assorted build fixes. --- src/gallium/drivers/llvmpipe/lp_state_derived.c | 6 ++++-- src/gallium/drivers/llvmpipe/lp_state_surface.c | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index c9439c7154..4015b0439a 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -228,8 +228,10 @@ update_tgsi_samplers( struct llvmpipe_context *llvmpipe ) } static void -update_culling() +update_culling(struct llvmpipe_context *lp) { + struct lp_setup_context *setup = lp->setup; + if (lp->reduced_api_prim == PIPE_PRIM_TRIANGLES && lp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL && lp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) { @@ -283,7 +285,7 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) } -void llvmpipe_prepare( ) +void llvmpipe_prepare(struct lp_setup_context *setup) { struct llvmpipe_context *lp = setup->llvmpipe; diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c index 06560335d7..bb1396c3ab 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -82,7 +82,7 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, } if (dirty) { - lp_setup_set_framebuffer( llvmpipe->setup, fb ); + lp_setup_set_framebuffer( lp->setup, fb ); lp->dirty |= LP_NEW_FRAMEBUFFER; } } -- cgit v1.2.3 From ab76b2a8b896edc1e972de108d044b70310b4324 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Thu, 8 Oct 2009 19:03:14 +0100 Subject: llvmpipe: Complete more rasterizer methods.. --- src/gallium/drivers/llvmpipe/lp_rast.c | 68 +++++++++++------------------ src/gallium/drivers/llvmpipe/lp_rast.h | 17 +++++--- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 14 ++++-- 3 files changed, 48 insertions(+), 51 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 2217debc02..50d2a0a0f3 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -50,14 +50,14 @@ struct lp_rasterizer *lp_rast_create( void ) } void lp_rast_bind_surfaces( struct lp_rasterizer *rast, - struct pipe_surface *color, - struct pipe_surface *zstencil, + struct pipe_surface *cbuf, + struct pipe_surface *zsbuf, const float *clear_color, double clear_depth, unsigned clear_stencil) { - pipe_surface_reference(&rast->state.color, color); - pipe_surface_reference(&rast->state.depth, depth); + pipe_surface_reference(&rast->state.cbuf, cbuf); + pipe_surface_reference(&rast->state.zsbuf, zsbuf); } @@ -93,12 +93,12 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, void lp_rast_clear_zstencil( struct lp_rasterizer *rast, const union lp_rast_cmd_arg *arg) { - const unsigned clear_color = arg->clear.clear_zstencil; + const unsigned clear_zstencil = arg->clear.clear_zstencil; unsigned i, j; for (i = 0; i < TILE_SIZE; i++) for (j = 0; j < TILE_SIZE; j++) - rast->tile.depth[i][j] = clear_depth; + rast->tile.depth[i*TILE_SIZE + j] = clear_zstencil; } @@ -119,7 +119,7 @@ void lp_rast_load_zstencil( struct lp_rasterizer *rast, void lp_rast_set_state( struct lp_rasterizer *rast, const union lp_rast_cmd_arg *arg ) { - rast->shader_state = arg->state; + rast->shader_state = arg->set_state; } @@ -128,36 +128,24 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, const union lp_rast_cmd_arg *arg, const struct lp_rast_shader_inputs *inputs ) { - unsigned i; - - /* Set up the silly quad coef pointers - */ - for (i = 0; i < 4; i++) { - rast->quads[i].posCoef = &inputs->posCoef; - rast->quads[i].coef = inputs->coef; - } + const uint32_t masks[4] = {~0, ~0, ~0, ~0}; + unsigned i, j; /* Use the existing preference for 8x2 (four quads) shading: */ - for (i = 0; i < TILE_SIZE; i += 8) { - for (j = 0; j < TILE_SIZE; j += 2) { - rast->shader_state.shade( inputs->jc, - rast->x + i, - rast->y + j, - rast->quads, 4 ); - } - } + for (i = 0; i < TILE_SIZE; i += 8) + for (j = 0; j < TILE_SIZE; j += 2) + lp_rast_shade_quads( rast, inputs, i, j, &masks); } -void lp_rast_shade_quads( const struct lp_rast_state *state, - struct lp_rast_tile *tile, - struct quad_header **quads, - unsigned nr ) +void lp_rast_shade_quads( struct lp_rasterizer *rast, + const struct lp_rast_shader_inputs *inputs, + unsigned x, unsigned y, + const unsigned *masks) { - struct quad_header *quad = quads[0]; - const unsigned x = quad->input.x0; - const unsigned y = quad->input.y0; + const struct lp_rast_state *state = rast->shader_state; + struct lp_rast_tile *tile = &rast->tile; uint8_t *color; uint8_t *depth; uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS]; @@ -165,18 +153,13 @@ void lp_rast_shade_quads( const struct lp_rast_state *state, unsigned q; /* Sanity checks */ - assert(nr * QUAD_SIZE == TILE_VECTOR_HEIGHT * TILE_VECTOR_WIDTH); assert(x % TILE_VECTOR_WIDTH == 0); assert(y % TILE_VECTOR_HEIGHT == 0); - for (q = 0; q < nr; ++q) { - assert(quads[q]->input.x0 == x + q*2); - assert(quads[q]->input.y0 == y); - } /* mask */ for (q = 0; q < 4; ++q) for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index) - mask[q][chan_index] = quads[q]->inout.mask & (1 << chan_index) ? ~0 : 0; + mask[q][chan_index] = masks[q] & (1 << chan_index) ? ~0 : 0; /* color buffer */ color = &TILE_PIXEL(tile->color, x, y, 0); @@ -184,7 +167,7 @@ void lp_rast_shade_quads( const struct lp_rast_state *state, /* depth buffer */ assert((x % 2) == 0); assert((y % 2) == 0); - depth = (uint8_t *)tile->depth + y*TILE_SIZE*4 + 2*x*4; + depth = tile->depth + y*TILE_SIZE + 2*x; /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ assert(lp_check_alignment(mask, 16)); @@ -196,9 +179,9 @@ void lp_rast_shade_quads( const struct lp_rast_state *state, /* run shader */ state->shader( &state->jc, x, y, - quad->coef->a0, - quad->coef->dadx, - quad->coef->dady, + inputs->a0, + inputs->dadx, + inputs->dady, &mask[0][0], color, depth); @@ -220,8 +203,9 @@ void lp_rast_end_tile( struct lp_rasterizer *rast, const unsigned y = rast->y; unsigned w = TILE_SIZE; unsigned h = TILE_SIZE; + void *map; - surface = rast->state.color; + surface = rast->state.cbuf; if(!surface) return; @@ -252,7 +236,7 @@ void lp_rast_end_tile( struct lp_rasterizer *rast, screen->transfer_unmap(screen, transfer); } - screen->tex_transfer_destroy(screen, transfer); + screen->tex_transfer_destroy(transfer); if (write_depth) { /* FIXME: call u_tile func to store depth/stencil to surface */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index f40208bbda..380a1adbd2 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -28,6 +28,7 @@ #ifndef LP_RAST_H #define LP_RAST_H +#include "pipe/p_compiler.h" #include "lp_jit.h" /* Initially create and program a single rasterizer directly. Later @@ -91,9 +92,6 @@ struct lp_rast_triangle { float dx12; float dx23; float dx31; - - /* State to run the shader: */ - struct lp_rast_shader_inputs inputs; }; struct clear_tile { @@ -112,8 +110,8 @@ struct load_tile { struct lp_rasterizer *lp_rast_create( void ); void lp_rast_bind_surfaces( struct lp_rasterizer *, - struct pipe_surface *color, - struct pipe_surface *zstencil, + struct pipe_surface *cbuf, + struct pipe_surface *zsbuf, const float *clear_color, double clear_depth, unsigned clear_stencil); @@ -154,7 +152,8 @@ void lp_rast_triangle( struct lp_rasterizer *, const union lp_rast_cmd_arg * ); void lp_rast_shade_tile( struct lp_rasterizer *, - const union lp_rast_cmd_arg * ); + const union lp_rast_cmd_arg *, + const struct lp_rast_shader_inputs *); void lp_rast_store_color( struct lp_rasterizer *, const union lp_rast_cmd_arg *); @@ -163,6 +162,12 @@ void lp_rast_store_zstencil( struct lp_rasterizer *, const union lp_rast_cmd_arg *); +/* End of tile: + */ + +void lp_rast_end_tile( struct lp_rasterizer *rast, + boolean write_depth ); + /* Shutdown: */ void lp_rast_destroy( struct lp_rasterizer * ); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index b819519553..29e4c8fd80 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -38,7 +38,7 @@ struct lp_rast_tile { uint8_t *color; - uint8_t *depth; + uint32_t *depth; }; @@ -55,12 +55,20 @@ struct lp_rasterizer { struct { - struct pipe_surface *color; - struct pipe_surface *zstencil; + struct pipe_surface *cbuf; + struct pipe_surface *zsbuf; unsigned clear_color; unsigned clear_depth; char clear_stencil; } state; + + const struct lp_rast_state *shader_state; }; + +void lp_rast_shade_quads( struct lp_rasterizer *rast, + const struct lp_rast_shader_inputs *inputs, + unsigned x, unsigned y, + const unsigned *masks); + #endif -- cgit v1.2.3 From f92787679d668bd1f48929da49d4df55be635fa9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Thu, 8 Oct 2009 19:03:35 +0100 Subject: llvmpipe: More assorted build fixes. --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 4 ++-- src/gallium/drivers/llvmpipe/lp_setup.c | 1 - src/gallium/drivers/llvmpipe/lp_state_derived.c | 30 ++++--------------------- 3 files changed, 6 insertions(+), 29 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 40965d5f65..63e956fb20 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -203,8 +203,8 @@ void lp_rast_triangle( struct lp_rasterizer *rast, */ minx = MAX2(tri->maxx, rast->x); miny = MAX2(tri->miny, rast->y); - maxx = MIN2(tri->maxx, rast->x + TILESIZE); - maxy = MIN2(tri->maxy, rast->y + TILESIZE); + maxx = MIN2(tri->maxx, rast->x + TILE_SIZE); + maxy = MIN2(tri->maxy, rast->y + TILE_SIZE); if (miny == maxy || minx == maxx) { diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 43a4f5f029..9016c4b364 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -250,7 +250,6 @@ lp_setup_clear( struct setup_context *setup, { if (setup->state == SETUP_ACTIVE) { struct lp_rast_clear_info *clear_info; - unsigned i, j; clear_info = alloc_clear_info( setup ); diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 4015b0439a..b801f054a2 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -205,28 +205,7 @@ compute_cliprect(struct llvmpipe_context *lp) } -static void -update_tgsi_samplers( struct llvmpipe_context *llvmpipe ) -{ - unsigned i; - - /* vertex shader samplers */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - llvmpipe->tgsi.vert_samplers[i].sampler = llvmpipe->sampler[i]; - llvmpipe->tgsi.vert_samplers[i].texture = llvmpipe->texture[i]; - llvmpipe->tgsi.vert_samplers[i].base.get_samples = lp_get_samples; - } - - /* fragment shader samplers */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - llvmpipe->tgsi.frag_samplers[i].sampler = llvmpipe->sampler[i]; - llvmpipe->tgsi.frag_samplers[i].texture = llvmpipe->texture[i]; - llvmpipe->tgsi.frag_samplers[i].base.get_samples = lp_get_samples; - } - - llvmpipe->jit_context.samplers = (struct tgsi_sampler **)llvmpipe->tgsi.frag_samplers_list; -} - +#if 0 static void update_culling(struct llvmpipe_context *lp) { @@ -243,6 +222,7 @@ update_culling(struct llvmpipe_context *lp) setup->winding = PIPE_WINDING_NONE; } } +#endif /* Hopefully this will remain quite simple, otherwise need to pull in @@ -259,10 +239,6 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) llvmpipe->dirty |= LP_NEW_TEXTURE; } - if (llvmpipe->dirty & (LP_NEW_SAMPLER | - LP_NEW_TEXTURE)) - update_tgsi_samplers( llvmpipe ); - if (llvmpipe->dirty & (LP_NEW_RASTERIZER | LP_NEW_FS | LP_NEW_VS)) @@ -285,6 +261,7 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) } +#if 0 void llvmpipe_prepare(struct lp_setup_context *setup) { struct llvmpipe_context *lp = setup->llvmpipe; @@ -294,3 +271,4 @@ void llvmpipe_prepare(struct lp_setup_context *setup) } } +#endif -- cgit v1.2.3 From 86dba3e4142276d76ecffc0cd238506df5efe9af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Thu, 8 Oct 2009 19:16:47 +0100 Subject: llvmpipe: Final adjustments to rasterizer methods. --- src/gallium/drivers/llvmpipe/lp_rast.c | 22 ++++++++++++---------- src/gallium/drivers/llvmpipe/lp_rast.h | 2 ++ 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 50d2a0a0f3..9d1861d246 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -74,8 +74,7 @@ void lp_rast_start_tile( struct lp_rasterizer *rast, void lp_rast_clear_color( struct lp_rasterizer *rast, const union lp_rast_cmd_arg *arg ) { - const unsigned clear_color = arg->clear.clear_color; - unsigned i, j; + const uint8_t *clear_color = arg->clear_color; if (clear_color[0] == clear_color[1] && clear_color[1] == clear_color[2] && @@ -83,6 +82,7 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, memset(rast->tile.color, clear_color[0], TILE_SIZE * TILE_SIZE * 4); } else { + unsigned x, y, chan; for (y = 0; y < TILE_SIZE; y++) for (x = 0; x < TILE_SIZE; x++) for (chan = 0; chan < 4; ++chan) @@ -93,7 +93,7 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, void lp_rast_clear_zstencil( struct lp_rasterizer *rast, const union lp_rast_cmd_arg *arg) { - const unsigned clear_zstencil = arg->clear.clear_zstencil; + const unsigned clear_zstencil = arg->clear_zstencil; unsigned i, j; for (i = 0; i < TILE_SIZE; i++) @@ -128,14 +128,14 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, const union lp_rast_cmd_arg *arg, const struct lp_rast_shader_inputs *inputs ) { - const uint32_t masks[4] = {~0, ~0, ~0, ~0}; - unsigned i, j; + const unsigned masks[4] = {~0, ~0, ~0, ~0}; + unsigned x, y; /* Use the existing preference for 8x2 (four quads) shading: */ - for (i = 0; i < TILE_SIZE; i += 8) - for (j = 0; j < TILE_SIZE; j += 2) - lp_rast_shade_quads( rast, inputs, i, j, &masks); + for (y = 0; y < TILE_SIZE; y += 2) + for (x = 0; x < TILE_SIZE; x += 8) + lp_rast_shade_quads( rast, inputs, x, y, masks); } @@ -146,8 +146,8 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, { const struct lp_rast_state *state = rast->shader_state; struct lp_rast_tile *tile = &rast->tile; - uint8_t *color; - uint8_t *depth; + void *color; + void *depth; uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS]; unsigned chan_index; unsigned q; @@ -247,6 +247,8 @@ void lp_rast_end_tile( struct lp_rasterizer *rast, */ void lp_rast_destroy( struct lp_rasterizer *rast ) { + pipe_surface_reference(&rast->state.cbuf, NULL); + pipe_surface_reference(&rast->state.zsbuf, NULL); align_free(rast->tile.depth); align_free(rast->tile.color); FREE(rast); diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 380a1adbd2..0aa111b472 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -128,6 +128,8 @@ union lp_rast_cmd_arg { const struct lp_rast_shader_inputs *shade_tile; const struct lp_rast_triangle *triangle; const struct lp_rast_state *set_state; + const uint8_t clear_color[4]; + unsigned clear_zstencil; }; -- cgit v1.2.3 From c7227f4b8d76d70b4f7ab8d384befd823c2be03e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 8 Oct 2009 19:14:46 +0100 Subject: llvmpipe: rast_tri updates --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 63e956fb20..896ac253a0 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -29,18 +29,10 @@ * Rasterization for binned triangles within a tile */ -#include "lp_context.h" #include "lp_quad.h" #include "lp_quad_pipe.h" -#include "lp_setup.h" -#include "lp_state.h" -#include "draw/draw_context.h" -#include "draw/draw_private.h" -#include "draw/draw_vertex.h" -#include "pipe/p_shader_tokens.h" -#include "pipe/p_thread.h" -#include "util/u_math.h" -#include "util/u_memory.h" +#include "lp_rast_priv.h" + #define BLOCKSIZE 4 @@ -48,7 +40,7 @@ /* Convert 8x8 block into four runs of quads and render each in turn. */ #if (BLOCKSIZE == 8) -static void block_full( struct triangle *tri, int x, int y ) +static void block_full( struct lp_rast_triangle *tri, int x, int y ) { struct quad_header *ptrs[4]; int i; @@ -79,7 +71,7 @@ static void block_full( struct triangle *tri, int x, int y ) } } #else -static void block_full( struct triangle *tri, int x, int y ) +static void block_full( struct lp_rast_triangle *tri, int x, int y ) { struct quad_header *ptrs[4]; int iy; @@ -108,7 +100,7 @@ do_quad( struct lp_rasterizer *rast, int x, int y, float c1, float c2, float c3 ) { - struct triangle *tri = rast->tri; + struct lp_rast_triangle *tri = rast->tri; struct quad_header *quad = &rast->quad[0]; float xstep1 = -tri->dy12; @@ -151,7 +143,7 @@ do_quad( struct lp_rasterizer *rast, * the quad: */ static void -do_block( struct triangle *tri, +do_block( struct lp_rast_triangle *tri, int x, int y, float c1, float c2, -- cgit v1.2.3 From 07ee87e6645318a34a395a50f4e8d554d118d24f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Thu, 8 Oct 2009 19:18:35 +0100 Subject: llvmpipe: Fix typo. --- src/gallium/drivers/llvmpipe/lp_rast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 9d1861d246..170684c1b2 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -214,7 +214,7 @@ void lp_rast_end_tile( struct lp_rasterizer *rast, if(x + w > surface->width) w = surface->width - x; if(y + h > surface->height) - h = surface->height - x; + h = surface->height - y; transfer = screen->get_tex_transfer(screen, surface->texture, -- cgit v1.2.3 From 7ef36171d5d58b97ffa179f824d77a9c339a7ae4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Thu, 8 Oct 2009 19:53:14 +0100 Subject: llvmpipe: Call lp_rast_shade_quads from tri rasterizer. --- src/gallium/drivers/llvmpipe/lp_rast.h | 3 + src/gallium/drivers/llvmpipe/lp_rast_tri.c | 276 +++++++++-------------------- 2 files changed, 90 insertions(+), 189 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 0aa111b472..41a7f5ebbf 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -92,6 +92,9 @@ struct lp_rast_triangle { float dx12; float dx23; float dx31; + + /* inputs for the shader */ + struct lp_rast_shader_inputs *inputs; }; struct clear_tile { diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 896ac253a0..138d6f55e0 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -29,80 +29,46 @@ * Rasterization for binned triangles within a tile */ +#include "util/u_math.h" #include "lp_quad.h" -#include "lp_quad_pipe.h" #include "lp_rast_priv.h" +#include "lp_tile_soa.h" -#define BLOCKSIZE 4 +#define BLOCKSIZE 8 /* Convert 8x8 block into four runs of quads and render each in turn. */ #if (BLOCKSIZE == 8) -static void block_full( struct lp_rast_triangle *tri, int x, int y ) +static void block_full( struct lp_rasterizer *rast, + const struct lp_rast_triangle *tri, + int x, int y ) { - struct quad_header *ptrs[4]; - int i; - - tri->quad[0].input.x0 = x + 0; - tri->quad[1].input.x0 = x + 2; - tri->quad[2].input.x0 = x + 4; - tri->quad[3].input.x0 = x + 6; - - for (i = 0; i < 4; i++, y += 2) { - tri->quad[0].inout.mask = 0xf; - tri->quad[1].inout.mask = 0xf; - tri->quad[2].inout.mask = 0xf; - tri->quad[3].inout.mask = 0xf; - - tri->quad[0].input.y0 = y; - tri->quad[1].input.y0 = y; - tri->quad[2].input.y0 = y; - tri->quad[3].input.y0 = y; - - /* XXX: don't bother with this ptrs business */ - ptrs[0] = &tri->quad[0]; - ptrs[1] = &tri->quad[1]; - ptrs[2] = &tri->quad[2]; - ptrs[3] = &tri->quad[3]; - - tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 4 ); - } + const unsigned masks[4] = {~0, ~0, ~0, ~0}; + int iy; + + for (iy = 0; iy < 8; iy += 2) + lp_rast_shade_quads(rast, tri->inputs, x, y + iy, masks); } #else -static void block_full( struct lp_rast_triangle *tri, int x, int y ) +static void block_full( struct lp_rasterizer *rast, + const struct lp_rast_triangle *tri, + int x, int y ) { - struct quad_header *ptrs[4]; + const unsigned masks[4] = {~0, ~0, 0, 0}; /* FIXME: Wasting quads!!! */ int iy; - tri->quad[0].input.x0 = x + 0; - tri->quad[1].input.x0 = x + 2; - - for (iy = 0; iy < 4; iy += 2) { - tri->quad[0].inout.mask = 0xf; - tri->quad[1].inout.mask = 0xf; - - tri->quad[0].input.y0 = y + iy; - tri->quad[1].input.y0 = y + iy; - - /* XXX: don't bother with this ptrs business */ - ptrs[0] = &tri->quad[0]; - ptrs[1] = &tri->quad[1]; - - tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 2 ); - } + for (iy = 0; iy < 4; iy += 2) + lp_rast_shade_quads(rast, tri->inputs, x, y + iy, masks); } #endif -static void -do_quad( struct lp_rasterizer *rast, +static INLINE unsigned +do_quad( const struct lp_rast_triangle *tri, int x, int y, float c1, float c2, float c3 ) { - struct lp_rast_triangle *tri = rast->tri; - struct quad_header *quad = &rast->quad[0]; - float xstep1 = -tri->dy12; float xstep2 = -tri->dy23; float xstep3 = -tri->dy31; @@ -111,43 +77,41 @@ do_quad( struct lp_rasterizer *rast, float ystep2 = tri->dx23; float ystep3 = tri->dx31; - quad->input.x0 = x; - quad->input.y0 = y; - quad->inout.mask = 0; + unsigned mask = 0; if (c1 > 0 && c2 > 0 && c3 > 0) - quad->inout.mask |= 1; + mask |= 1; if (c1 + xstep1 > 0 && c2 + xstep2 > 0 && c3 + xstep3 > 0) - quad->inout.mask |= 2; + mask |= 2; if (c1 + ystep1 > 0 && c2 + ystep2 > 0 && c3 + ystep3 > 0) - quad->inout.mask |= 4; + mask |= 4; if (c1 + ystep1 + xstep1 > 0 && c2 + ystep2 + xstep2 > 0 && c3 + ystep3 + xstep3 > 0) - quad->inout.mask |= 8; + mask |= 8; - if (quad->inout.mask) - rast->state->run( rast->state->state, &quad, 1 ); + return mask; } /* Evaluate each pixel in a block, generate a mask and possibly render * the quad: */ static void -do_block( struct lp_rast_triangle *tri, - int x, int y, - float c1, - float c2, - float c3 ) +do_block( struct lp_rasterizer *rast, + const struct lp_rast_triangle *tri, + int x, int y, + float c1, + float c2, + float c3 ) { const int step = 2; @@ -166,19 +130,24 @@ do_block( struct lp_rast_triangle *tri, float cx2 = c2; float cx3 = c3; + unsigned masks[4] = {0, 0, 0, 0}; + for (ix = 0; ix < BLOCKSIZE; ix += 2) { - do_quad(tri, x+ix, y+iy, cx1, cx2, cx3); + masks[ix >> 1] = do_quad(tri, x + ix, y + iy, cx1, cx2, cx3); cx1 += xstep1; cx2 += xstep2; cx3 += xstep3; } + lp_rast_shade_quads(rast, tri->inputs, x, y + iy, masks); + c1 += ystep1; c2 += ystep2; c3 += ystep3; } + } @@ -187,8 +156,9 @@ do_block( struct lp_rast_triangle *tri, * for this triangle: */ void lp_rast_triangle( struct lp_rasterizer *rast, - const struct lp_rast_triangle *tri ) + const union lp_rast_cmd_arg *arg ) { + const struct lp_rast_triangle *tri = arg->triangle; int minx, maxx, miny, maxy; /* Clamp to tile dimensions: @@ -205,136 +175,64 @@ void lp_rast_triangle( struct lp_rasterizer *rast, return; } - /* Bind parameter interpolants: - */ - for (i = 0; i < Elements(rast->quad); i++) { - rast->quad[i].coef = tri->coef; - rast->quad[i].posCoef = &tri->position_coef; - } - - /* Small area? - */ - if (miny + 16 > maxy && - minx + 16 > maxx) - { - const int step = 2; + const int step = BLOCKSIZE; - float xstep1 = -step * tri->dy12; - float xstep2 = -step * tri->dy23; - float xstep3 = -step * tri->dy31; + float ei1 = tri->ei1 * step; + float ei2 = tri->ei2 * step; + float ei3 = tri->ei3 * step; - float ystep1 = step * tri->dx12; - float ystep2 = step * tri->dx23; - float ystep3 = step * tri->dx31; + float eo1 = tri->eo1 * step; + float eo2 = tri->eo2 * step; + float eo3 = tri->eo3 * step; - float eo1 = tri->eo1 * step; - float eo2 = tri->eo2 * step; - float eo3 = tri->eo3 * step; + float xstep1 = -step * tri->dy12; + float xstep2 = -step * tri->dy23; + float xstep3 = -step * tri->dy31; - int x, y; + float ystep1 = step * tri->dx12; + float ystep2 = step * tri->dx23; + float ystep3 = step * tri->dx31; + int x, y; - minx &= ~(step-1); - maxx &= ~(step-1); + minx &= ~(step-1); + miny &= ~(step-1); - /* Subdivide space into NxM blocks, where each block is square and - * power-of-four in dimension. - * - * Trivially accept or reject blocks, else jump to per-pixel - * examination above. - */ - for (y = miny; y < maxy; y += step) - { - float cx1 = c1; - float cx2 = c2; - float cx3 = c3; - - for (x = minx; x < maxx; x += step) - { - if (cx1 + eo1 < 0 || - cx2 + eo2 < 0 || - cx3 + eo3 < 0) - { - } - else - { - do_quad(&tri, x, y, cx1, cx2, cx3); - } - - /* Iterate cx values across the region: - */ - cx1 += xstep1; - cx2 += xstep2; - cx3 += xstep3; - } - - /* Iterate c values down the region: - */ - c1 += ystep1; - c2 += ystep2; - c3 += ystep3; - } - } - else + for (y = miny; y < maxy; y += step) { - const int step = BLOCKSIZE; - - float ei1 = tri->ei1 * step; - float ei2 = tri->ei2 * step; - float ei3 = tri->ei3 * step; - - float eo1 = tri->eo1 * step; - float eo2 = tri->eo2 * step; - float eo3 = tri->eo3 * step; - - float xstep1 = -step * tri->dy12; - float xstep2 = -step * tri->dy23; - float xstep3 = -step * tri->dy31; - - float ystep1 = step * tri->dx12; - float ystep2 = step * tri->dx23; - float ystep3 = step * tri->dx31; - int x, y; - - minx &= ~(step-1); - miny &= ~(step-1); + float cx1 = c1; + float cx2 = c2; + float cx3 = c3; - for (y = miny; y < maxy; y += step) + for (x = minx; x < maxx; x += step) { - float cx1 = c1; - float cx2 = c2; - float cx3 = c3; - - for (x = minx; x < maxx; x += step) - { - if (cx1 + eo1 < 0 || - cx2 + eo2 < 0 || - cx3 + eo3 < 0) - { - } - else if (cx1 + ei1 > 0 && - cx2 + ei2 > 0 && - cx3 + ei3 > 0) - { - block_full(&tri, x, y); /* trivial accept */ - } - else - { - do_block(&tri, x, y, cx1, cx2, cx3); - } - - /* Iterate cx values across the region: - */ - cx1 += xstep1; - cx2 += xstep2; - cx3 += xstep3; - } - - /* Iterate c values down the region: - */ - c1 += ystep1; - c2 += ystep2; - c3 += ystep3; + if (cx1 + eo1 < 0 || + cx2 + eo2 < 0 || + cx3 + eo3 < 0) + { + } + else if (cx1 + ei1 > 0 && + cx2 + ei2 > 0 && + cx3 + ei3 > 0) + { + block_full(rast, tri, x, y); /* trivial accept */ + } + else + { + do_block(rast, tri, x, y, cx1, cx2, cx3); + } + + /* Iterate cx values across the region: + */ + cx1 += xstep1; + cx2 += xstep2; + cx3 += xstep3; } + + /* Iterate c values down the region: + */ + c1 += ystep1; + c2 += ystep2; + c3 += ystep3; } } -- cgit v1.2.3 From df8cedf9898cfc9c2ff8d5249ab1fe316f575a84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Thu, 8 Oct 2009 19:53:56 +0100 Subject: llvmpipe: Adjust interpolation coeffs declaration. --- src/gallium/drivers/llvmpipe/lp_rast.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 41a7f5ebbf..492e4b06ad 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -61,11 +61,15 @@ struct lp_rast_shader_inputs { */ const struct lp_rast_state *state; - /* Attribute interpolation: FIXME: reduce memory waste! + /* Attribute interpolation: + * + * First coefficient is position. + * + * FIXME: reduce memory waste! */ - float a0[PIPE_MAX_ATTRIBS][4]; - float dadx[PIPE_MAX_ATTRIBS][4]; - float dady[PIPE_MAX_ATTRIBS][4]; + float a0[1 + PIPE_MAX_SHADER_INPUTS][4]; + float dadx[1 + PIPE_MAX_SHADER_INPUTS][4]; + float dady[1 + PIPE_MAX_SHADER_INPUTS][4]; }; -- cgit v1.2.3 From 21489d2275ff556f6e44008d3f5493ca64619696 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Thu, 8 Oct 2009 19:56:01 +0100 Subject: llvmpipe: Remove quad headers. --- src/gallium/drivers/llvmpipe/lp_quad.h | 114 ----------------------------- src/gallium/drivers/llvmpipe/lp_rast.c | 1 - src/gallium/drivers/llvmpipe/lp_rast_tri.c | 1 - src/gallium/drivers/llvmpipe/lp_state_fs.c | 1 - 4 files changed, 117 deletions(-) delete mode 100644 src/gallium/drivers/llvmpipe/lp_quad.h diff --git a/src/gallium/drivers/llvmpipe/lp_quad.h b/src/gallium/drivers/llvmpipe/lp_quad.h deleted file mode 100644 index 7eb05de77a..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_quad.h +++ /dev/null @@ -1,114 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell - */ - -#ifndef LP_QUAD_H -#define LP_QUAD_H - -#include "pipe/p_state.h" -#include "tgsi/tgsi_exec.h" - - -#define QUAD_PRIM_POINT 1 -#define QUAD_PRIM_LINE 2 -#define QUAD_PRIM_TRI 3 - - -/* The rasterizer generates 2x2 quads of fragment and feeds them to - * the current fp_machine (see below). - * Remember that Y=0=top with Y increasing down the window. - */ -#define QUAD_TOP_LEFT 0 -#define QUAD_TOP_RIGHT 1 -#define QUAD_BOTTOM_LEFT 2 -#define QUAD_BOTTOM_RIGHT 3 - -#define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT) -#define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT) -#define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT) -#define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT) -#define MASK_ALL 0xf - - -/** - * Quad stage inputs (pos, coverage, front/back face, etc) - */ -struct quad_header_input -{ - int x0, y0; /**< quad window pos, always even */ - float coverage[QUAD_SIZE]; /**< fragment coverage for antialiasing */ - unsigned facing:1; /**< Front (0) or back (1) facing? */ - unsigned prim:2; /**< QUAD_PRIM_POINT, LINE, TRI */ -}; - - -/** - * Quad stage inputs/outputs. - */ -struct quad_header_inout -{ - unsigned mask:4; -}; - - -/** - * Quad stage outputs (color & depth). - */ -struct quad_header_output -{ - /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */ - float ALIGN16_ATTRIB color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; -}; - - -/** - * Input interpolation coefficients - */ -struct quad_interp_coef -{ - float ALIGN16_ATTRIB a0[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - float ALIGN16_ATTRIB dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - float ALIGN16_ATTRIB dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; -}; - - -/** - * Encodes everything we need to know about a 2x2 pixel block. Uses - * "Channel-Serial" or "SoA" layout. - */ -struct quad_header { - struct quad_header_input input; - struct quad_header_inout inout; - - /* Redundant/duplicated: - */ - const struct quad_interp_coef *coef; -}; - -#endif /* LP_QUAD_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 170684c1b2..110caafffb 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -28,7 +28,6 @@ #include "util/u_memory.h" #include "lp_state.h" -#include "lp_quad.h" #include "lp_rast.h" #include "lp_rast_priv.h" #include "lp_tile_soa.h" diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 138d6f55e0..86c785babb 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -30,7 +30,6 @@ */ #include "util/u_math.h" -#include "lp_quad.h" #include "lp_rast_priv.h" #include "lp_tile_soa.h" diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index b00be0cc32..2c8b383123 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -84,7 +84,6 @@ #include "lp_screen.h" #include "lp_context.h" #include "lp_state.h" -#include "lp_quad.h" #include "lp_tex_sample.h" -- cgit v1.2.3 From 0718c7700533a965d7cd06b4f67b82bbae6e66a1 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 8 Oct 2009 19:58:28 +0100 Subject: llvmpipe: work on clears and coefficients --- src/gallium/drivers/llvmpipe/lp_rast.c | 15 +- src/gallium/drivers/llvmpipe/lp_rast.h | 27 +--- src/gallium/drivers/llvmpipe/lp_setup.c | 51 +++++-- src/gallium/drivers/llvmpipe/lp_setup.h | 15 ++ src/gallium/drivers/llvmpipe/lp_setup_context.h | 5 +- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 186 ++++++++++++------------ 6 files changed, 166 insertions(+), 133 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 110caafffb..695ddc089a 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -48,14 +48,17 @@ struct lp_rasterizer *lp_rast_create( void ) return rast; } -void lp_rast_bind_surfaces( struct lp_rasterizer *rast, - struct pipe_surface *cbuf, - struct pipe_surface *zsbuf, - const float *clear_color, - double clear_depth, - unsigned clear_stencil) +void lp_rast_bind_color( struct lp_rasterizer *rast, + struct pipe_surface *cbuf, + boolean write_color ) { pipe_surface_reference(&rast->state.cbuf, cbuf); +} + +void lp_rast_bind_zstencil( struct lp_rasterizer *rast, + struct pipe_surface *zsbuf, + boolean write_zstencil ) +{ pipe_surface_reference(&rast->state.zsbuf, zsbuf); } diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 492e4b06ad..28bb0a60eb 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -101,27 +101,17 @@ struct lp_rast_triangle { struct lp_rast_shader_inputs *inputs; }; -struct clear_tile { - boolean do_color; - boolean do_depth_stencil; - unsigned rgba; - unsigned depth_stencil; -}; - -struct load_tile { - boolean do_color; - boolean do_depth_stencil; -}; struct lp_rasterizer *lp_rast_create( void ); -void lp_rast_bind_surfaces( struct lp_rasterizer *, - struct pipe_surface *cbuf, - struct pipe_surface *zsbuf, - const float *clear_color, - double clear_depth, - unsigned clear_stencil); +void lp_rast_bind_color( struct lp_rasterizer *, + struct pipe_surface *cbuf, + boolean write_when_done ); + +void lp_rast_bind_depth( struct lp_rasterizer *, + struct pipe_surface *zsbuf, + boolean write_when_done ); /* Begining of each tile: */ @@ -174,8 +164,7 @@ void lp_rast_store_zstencil( struct lp_rasterizer *, /* End of tile: */ -void lp_rast_end_tile( struct lp_rasterizer *rast, - boolean write_depth ); +void lp_rast_end_tile( struct lp_rasterizer *rast ); /* Shutdown: */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 9016c4b364..57ac85468d 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -58,6 +58,8 @@ static void reset_context( struct setup_context *setup ) { unsigned i, j; + /* Free binner command lists: + */ for (i = 0; i < setup->tiles_x; i++) { for (j = 0; j < setup->tiles_y; j++) { struct cmd_block_list *list = &setup->tile[i][j]; @@ -73,6 +75,8 @@ static void reset_context( struct setup_context *setup ) } } + /* Free binned data: + */ { struct data_block_list *list = &setup->data; struct data_block *block, *tmp; @@ -84,6 +88,10 @@ static void reset_context( struct setup_context *setup ) list->head = list->tail; } + + /* Reset some state: + */ + setup->clear.flags = 0; } @@ -131,7 +139,7 @@ rasterize_bins( struct setup_context *setup, } } - lp_rast_finish_tile( rast ); + lp_rast_end_tile( rast ); } } @@ -144,10 +152,10 @@ static void begin_binning( struct setup_context *setup ) { if (setup->fb.color) { - if (setup->fb.clear_color) + if (setup->clear.flags & PIPE_CLEAR_COLOR) bin_everywhere( setup, lp_rast_clear_color, - &setup->clear_data ); + &setup->clear.color ); else bin_everywhere( setup, lp_rast_load_color, @@ -155,10 +163,10 @@ begin_binning( struct setup_context *setup ) } if (setup->fb.zstencil) { - if (setup->fb.clear_zstencil) + if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) bin_everywhere( setup, lp_rast_clear_zstencil, - &setup->clear_data ); + &setup->clear.zstencil ); else bin_everywhere( setup, lp_rast_load_zstencil, @@ -176,7 +184,7 @@ static void execute_clears( struct setup_context *setup ) { begin_binning( setup ); - rasterize_bins( setup ); + rasterize_bins( setup, TRUE ); } @@ -192,7 +200,7 @@ set_state( struct setup_context *setup, switch (new_state) { case SETUP_ACTIVE: if (old_state == SETUP_FLUSHED) - setup_begin_binning( setup ); + begin_binning( setup ); break; case SETUP_CLEARED: @@ -203,10 +211,10 @@ set_state( struct setup_context *setup, break; case SETUP_FLUSHED: - if (old_state == SETUP_CLEAR) + if (old_state == SETUP_CLEARED) execute_clears( setup ); else - rasterize_bins( setup ); + rasterize_bins( setup, TRUE ); break; } @@ -271,15 +279,20 @@ lp_setup_clear( struct setup_context *setup, } else { set_state( setup, SETUP_CLEARED ); + setup->clear.flags |= flags; if (flags & PIPE_CLEAR_COLOR) { - memcpy(setup->clear.color, color, sizeof setup->clear.color); + util_pack_color(rgba, + setup->fb.cbuf->format, + &setup->clear.color.clear_color ); } if (flags & PIPE_CLEAR_DEPTH_STENCIL) { - setup->clear.depth = clear_depth; - setup->clear.stencil = clear_stencil; + setup->clear.zstencil.clear_zstencil = + util_pack_z_stencil(setup->fb.zsbuf->format, + depth, + stencil); } } } @@ -293,6 +306,12 @@ lp_setup_set_fs_inputs( struct setup_context *setup, memcpy( setup->interp, interp, nr * sizeof interp[0] ); } +void +lp_setup_set_shader_state( struct setup_context *setup, + const struct jit_context *jc ) +{ +} + static void first_triangle( struct setup_context *setup, @@ -324,10 +343,10 @@ lp_setup_line(struct setup_context *setup, } void -lp_setup_triangle(struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4]) +lp_setup_tri(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) { setup->triangle( setup, v0, v1, v2 ); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 6f560f5f93..7c813070b9 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -65,6 +65,17 @@ void lp_setup_point( struct setup_context *setup, const float (*v0)[4] ); + +void +lp_setup_flush( struct setup_context *setup, + unsigned flags ); + + +void +lp_setup_bind_framebuffer( struct setup_context *setup, + struct pipe_surface *color, + struct pipe_surface *zstencil ); + void lp_setup_set_triangle_state( struct setup_context *setup, unsigned cullmode, @@ -75,6 +86,10 @@ lp_setup_set_fs_inputs( struct setup_context *setup, const enum lp_interp *interp, unsigned nr ); +void +lp_setup_set_shader_state( struct setup_context *setup, + const struct jit_context *jc ); + boolean lp_setup_is_texture_referenced( struct setup_context *setup, const struct pipe_texture *texture ); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 19d163df8e..5722e3e9de 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -88,9 +88,8 @@ struct setup_context { struct { unsigned flags; - float clear_color[4]; - double clear_depth; - unsigned clear_stencil; + union lp_rast_cmd_arg color; + union lp_rast_cmd_arg zstencil; } clear; enum { diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 75a0ea8888..efd91124a0 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -38,55 +38,60 @@ /** * Compute a0 for a constant-valued coefficient (GL_FLAT shading). */ -static void constant_coef( struct tgsi_interp_coef *coef, +static void constant_coef( struct lp_rast_triangle *tri, const float (*v3)[4], unsigned vert_attr, unsigned i ) { - coef->a0[i] = v3[vert_attr][i]; - coef->dadx[i] = 0; - coef->dady[i] = 0; + tri->inputs.a0[i] = v3[vert_attr][i]; + tri->inputs.dadx[i] = 0; + tri->inputs.dady[i] = 0; } /** * Compute a0, dadx and dady for a linearly interpolated coefficient, * for a triangle. */ -static void linear_coef( struct triangle *tri, - struct tgsi_interp_coef *coef, +static void linear_coef( struct lp_rast_triangle *tri, + unsigned input, const float (*v1)[4], const float (*v2)[4], const float (*v3)[4], - unsigned vert_attr, - unsigned i) + unsigned vert_attr) { - float a1 = v1[vert_attr][i]; - float a2 = v2[vert_attr][i]; - float a3 = v3[vert_attr][i]; - - float da12 = a1 - a2; - float da31 = a3 - a1; - float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; - float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; - - coef->dadx[i] = dadx; - coef->dady[i] = dady; - - /* calculate a0 as the value which would be sampled for the - * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). - * - * this is neat but unfortunately not a good way to do things for - * triangles with very large values of dadx or dady as it will - * result in the subtraction and re-addition from a0 of a very - * large number, which means we'll end up loosing a lot of the - * fractional bits and precision from a0. the way to fix this is - * to define a0 as the sample at a pixel center somewhere near vmin - * instead - i'll switch to this later. - */ - coef->a0[i] = (v1[vert_attr][i] - - (dadx * (v1[0][0] - 0.5f) + - dady * (v1[0][1] - 0.5f))); + unsigned i; + + input *= 4; + + for (i = 0; i < NUM_CHANNELS; i++) { + float a1 = v1[vert_attr][i]; + float a2 = v2[vert_attr][i]; + float a3 = v3[vert_attr][i]; + + float da12 = a1 - a2; + float da31 = a3 - a1; + float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; + float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; + + tri->inputs.dadx[input+i] = dadx; + tri->inputs.dady[input+i] = dady; + + /* calculate a0 as the value which would be sampled for the + * fragment at (0,0), taking into account that we want to sample at + * pixel centers, in other words (0.5, 0.5). + * + * this is neat but unfortunately not a good way to do things for + * triangles with very large values of dadx or dady as it will + * result in the subtraction and re-addition from a0 of a very + * large number, which means we'll end up loosing a lot of the + * fractional bits and precision from a0. the way to fix this is + * to define a0 as the sample at a pixel center somewhere near vmin + * instead - i'll switch to this later. + */ + tri->inputs.a0[input+i] = (v1[vert_attr][i] - + (dadx * (v1[0][0] - 0.5f) + + dady * (v1[0][1] - 0.5f))); + } } @@ -98,30 +103,35 @@ static void linear_coef( struct triangle *tri, * Later, when we compute the value at a particular fragment position we'll * divide the interpolated value by the interpolated W at that fragment. */ -static void perspective_coef( struct triangle *tri, - struct tgsi_interp_coef *coef, +static void perspective_coef( struct lp_rast_triangle *tri, const float (*v1)[4], const float (*v2)[4], const float (*v3)[4], unsigned vert_attr, unsigned i) { - /* premultiply by 1/w (v[0][3] is always 1/w): - */ - float a1 = v1[vert_attr][i] * v1[0][3]; - float a2 = v2[vert_attr][i] * v2[0][3]; - float a3 = v3[vert_attr][i] * v3[0][3]; - float da12 = a1 - a2; - float da31 = a3 - a1; - float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; - float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; - - - coef->dadx[i] = dadx; - coef->dady[i] = dady; - coef->a0[i] = (a1 - - (dadx * (v1[0][0] - 0.5f) + - dady * (v1[0][1] - 0.5f))); + unsigned i; + + input *= 4; + + for (i = 0; i < NUM_CHANNELS; i++) { + /* premultiply by 1/w (v[0][3] is always 1/w): + */ + float a1 = v1[vert_attr][i] * v1[0][3]; + float a2 = v2[vert_attr][i] * v2[0][3]; + float a3 = v3[vert_attr][i] * v3[0][3]; + float da12 = a1 - a2; + float da31 = a3 - a1; + float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; + float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; + + + tri->inputs.dadx[input+i] = dadx; + tri->inputs.dady[input+i] = dady; + tri->inputs.a0[input+i] = (a1 - + (dadx * (v1[0][0] - 0.5f) + + dady * (v1[0][1] - 0.5f))); + } } @@ -132,24 +142,26 @@ static void perspective_coef( struct triangle *tri, * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. */ static void -setup_fragcoord_coef(struct triangle *tri, unsigned slot) +setup_fragcoord_coef(struct lp_rast_triangle *tri, unsigned slot) { + slot *= 4; + /*X*/ - tri->coef[slot].a0[0] = 0.0; - tri->coef[slot].dadx[0] = 1.0; - tri->coef[slot].dady[0] = 0.0; + tri->inputs.a0[slot+0] = 0.0; + tri->inputs.dadx[slot+0] = 1.0; + tri->inputs.dady[slot+0] = 0.0; /*Y*/ - tri->coef[slot].a0[1] = 0.0; - tri->coef[slot].dadx[1] = 0.0; - tri->coef[slot].dady[1] = 1.0; + tri->inputs.a0[slot+1] = 0.0; + tri->inputs.dadx[slot+1] = 0.0; + tri->inputs.dady[slot+1] = 1.0; /*Z*/ - tri->coef[slot].a0[2] = tri->position_coef.a0[2]; - tri->coef[slot].dadx[2] = tri->position_coef.dadx[2]; - tri->coef[slot].dady[2] = tri->position_coef.dady[2]; + tri->inputs.a0[slot+2] = tri->inputs.a0[2]; + tri->inputs.dadx[slot+2] = tri->inputs.dadx[2]; + tri->inputs.dady[slot+2] = tri->inputs.dady[2]; /*W*/ - tri->coef[slot].a0[3] = tri->position_coef.a0[3]; - tri->coef[slot].dadx[3] = tri->position_coef.dadx[3]; - tri->coef[slot].dady[3] = tri->position_coef.dady[3]; + tri->inputs.a0[slot+3] = tri->inputs.a0[3]; + tri->inputs.dadx[slot+3] = tri->inputs.dadx[3]; + tri->inputs.dady[slot+3] = tri->inputs.dady[3]; } @@ -158,50 +170,46 @@ setup_fragcoord_coef(struct triangle *tri, unsigned slot) * Compute the tri->coef[] array dadx, dady, a0 values. */ static void setup_tri_coefficients( struct setup_context *setup, - struct triangle *tri, + struct lp_rast_triangle *tri, const float (*v1)[4], const float (*v2)[4], const float (*v3)[4], boolean frontface ) { - const struct vertex_info *vinfo = setup->vinfo; unsigned input; /* z and w are done by linear interpolation: */ - linear_coef(tri, tri->position_coef, v1, v2, v3, 0, 2); - linear_coef(tri, tri->position_coef, v1, v2, v3, 0, 3); + setup_fragcoord_coef(tri, 0); + linear_coef(tri, input, v1, v2, v3, vert_attr, i); - /* setup interpolation for all the remaining attributes: + /* setup interpolation for all the remaining attrbutes: */ - for (input = 0; input < vinfo->num_fs_inputs; input++) { - unsigned vert_attr = vinfo->attrib[input].src_index; + for (input = 0; input < setup->fs.nr_inputs; input++) { + unsigned vert_attr = setup->fs.input[input].src_index; unsigned i; - switch (vinfo->attrib[input].interp_mode) { - case INTERP_CONSTANT: - for (i = 0; i < NUM_CHANNELS; i++) - constant_coef(tri->coef[input], v3, vert_attr, i); + switch (setup->fs.input[input].interp_mode) { + case LP_INTERP_CONSTANT: + constant_coef(tri, input, v3, vert_attr, i); break; - case INTERP_LINEAR: - for (i = 0; i < NUM_CHANNELS; i++) - linear_coef(tri, tri->coef[input], v1, v2, v3, vert_attr, i); + case LP_INTERP_LINEAR: + linear_coef(tri, input, v1, v2, v3, vert_attr, i); break; - case INTERP_PERSPECTIVE: - for (i = 0; i < NUM_CHANNELS; i++) - perspective_coef(tri, tri->coef[input], v1, v2, v3, vert_attr, i); + case LP_INTERP_PERSPECTIVE: + perspective_coef(tri, input, v1, v2, v3, vert_attr, i); break; - case INTERP_POS: + case LP_INTERP_POS: setup_fragcoord_coef(tri, input); break; - case INTERP_FACING: - tri->coef[input].a0[0] = 1.0f - frontface; - tri->coef[input].dadx[0] = 0.0; - tri->coef[input].dady[0] = 0.0; + case LP_INTERP_FACING: + tri->inputs.a0[input*4+0] = 1.0f - frontface; + tri->inputs.dadx[input*4+0] = 0.0; + tri->da[input].dady[0] = 0.0; break; default: @@ -255,7 +263,7 @@ do_triangle_ccw(struct lp_setup *setup, const float x2 = subpixel_snap(v2[0][0]); const float x3 = subpixel_snap(v3[0][0]); - struct triangle *tri = allocate_triangle( setup ); + struct lp_setup_triangle *tri = lp_setup_alloc_data( setup, sizeof *tri ); float area; float c1, c2, c3; int i; -- cgit v1.2.3 From 1814395b7ef3506935ae1f12630cdd602e15cd55 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 8 Oct 2009 22:46:21 +0100 Subject: llvmpipe: triangle struct owns its copy of shader inputs --- src/gallium/drivers/llvmpipe/lp_rast.h | 2 +- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 28bb0a60eb..64d668f998 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -98,7 +98,7 @@ struct lp_rast_triangle { float dx31; /* inputs for the shader */ - struct lp_rast_shader_inputs *inputs; + struct lp_rast_shader_inputs inputs; }; diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 86c785babb..8cd3fcc360 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -48,7 +48,7 @@ static void block_full( struct lp_rasterizer *rast, int iy; for (iy = 0; iy < 8; iy += 2) - lp_rast_shade_quads(rast, tri->inputs, x, y + iy, masks); + lp_rast_shade_quads(rast, &tri->inputs, x, y + iy, masks); } #else static void block_full( struct lp_rasterizer *rast, @@ -59,7 +59,7 @@ static void block_full( struct lp_rasterizer *rast, int iy; for (iy = 0; iy < 4; iy += 2) - lp_rast_shade_quads(rast, tri->inputs, x, y + iy, masks); + lp_rast_shade_quads(rast, &tri->inputs, x, y + iy, masks); } #endif @@ -140,7 +140,7 @@ do_block( struct lp_rasterizer *rast, cx3 += xstep3; } - lp_rast_shade_quads(rast, tri->inputs, x, y + iy, masks); + lp_rast_shade_quads(rast, &tri->inputs, x, y + iy, masks); c1 += ystep1; c2 += ystep2; -- cgit v1.2.3 From 253dfed93918bd87c4a55047a9d569ede545f8be Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 8 Oct 2009 23:08:41 +0100 Subject: llvmpipe: get lp_setup compiling --- src/gallium/drivers/llvmpipe/lp_setup.c | 131 ++++++++++++++++-------- src/gallium/drivers/llvmpipe/lp_setup.h | 11 +- src/gallium/drivers/llvmpipe/lp_setup_context.h | 28 +++-- 3 files changed, 117 insertions(+), 53 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 57ac85468d..9f1b3d21f0 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -35,6 +35,10 @@ #include "lp_setup_context.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_pack_color.h" +#include "pipe/p_defines.h" + +static void set_state( struct setup_context *, unsigned ); void lp_setup_new_cmd_block( struct cmd_block_list *list ) { @@ -54,6 +58,37 @@ void lp_setup_new_data_block( struct data_block_list *list ) block->used = 0; } + +static void +first_triangle( struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) +{ + set_state( setup, SETUP_ACTIVE ); + lp_setup_choose_triangle( setup ); + setup->triangle( setup, v0, v1, v2 ); +} + +static void +first_line( struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4]) +{ + set_state( setup, SETUP_ACTIVE ); + lp_setup_choose_line( setup ); + setup->line( setup, v0, v1 ); +} + +static void +first_point( struct setup_context *setup, + const float (*v0)[4]) +{ + set_state( setup, SETUP_ACTIVE ); + lp_setup_choose_point( setup ); + setup->point( setup, v0 ); +} + static void reset_context( struct setup_context *setup ) { unsigned i, j; @@ -92,6 +127,13 @@ static void reset_context( struct setup_context *setup ) /* Reset some state: */ setup->clear.flags = 0; + + /* Have an explicit "start-binning" call and get rid of this + * pointer twiddling? + */ + setup->line = first_line; + setup->point = first_point; + setup->triangle = first_triangle; } @@ -119,11 +161,11 @@ rasterize_bins( struct setup_context *setup, unsigned i,j,k; lp_rast_bind_color( rast, - setup->fb.color, + setup->fb.cbuf, TRUE ); /* WRITE */ lp_rast_bind_depth( rast, - setup->fb.zstencil, + setup->fb.zsbuf, write_depth ); /* WRITE */ for (i = 0; i < setup->tiles_x; i++) { @@ -151,7 +193,7 @@ rasterize_bins( struct setup_context *setup, static void begin_binning( struct setup_context *setup ) { - if (setup->fb.color) { + if (setup->fb.cbuf) { if (setup->clear.flags & PIPE_CLEAR_COLOR) bin_everywhere( setup, lp_rast_clear_color, @@ -162,7 +204,7 @@ begin_binning( struct setup_context *setup ) NULL ); } - if (setup->fb.zstencil) { + if (setup->fb.zsbuf) { if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) bin_everywhere( setup, lp_rast_clear_zstencil, @@ -239,8 +281,8 @@ lp_setup_bind_framebuffer( struct setup_context *setup, set_state( setup, SETUP_FLUSHED ); - pipe_surface_reference( &setup->fb.color, color ); - pipe_surface_reference( &setup->fb.zstencil, zstencil ); + pipe_surface_reference( &setup->fb.cbuf, color ); + pipe_surface_reference( &setup->fb.zsbuf, zstencil ); width = MAX2( color->width, zstencil->width ); height = MAX2( color->height, zstencil->height ); @@ -251,44 +293,55 @@ lp_setup_bind_framebuffer( struct setup_context *setup, void lp_setup_clear( struct setup_context *setup, - const float *clear_color, - double clear_depth, - unsigned clear_stencil, + const float *color, + double depth, + unsigned stencil, unsigned flags ) { if (setup->state == SETUP_ACTIVE) { - struct lp_rast_clear_info *clear_info; + /* Add the clear to existing bins. In the unusual case where + * both color and depth-stencilare being cleared, we could + * discard the currently binned scene and start again, but I + * don't see that as being a common usage. + */ + if (flags & PIPE_CLEAR_COLOR) { + union lp_rast_cmd_arg *arg = get_data( &setup->data, sizeof *arg ); - clear_info = alloc_clear_info( setup ); + util_pack_color(color, + setup->fb.cbuf->format, + &arg->clear_color ); - if (flags & PIPE_CLEAR_COLOR) { - pack_color( setup, - clear_info->color, - clear_color ); - bin_everywhere(setup, lp_rast_clear_color, clear_info ); + bin_everywhere(setup, lp_rast_clear_color, arg ); } - if (flags & PIPE_CLEAR_DEPTH_STENCIL) { - pack_depth_stencil( setup, - clear_info->depth, - clear_depth, - clear_stencil ); + if (flags & PIPE_CLEAR_DEPTHSTENCIL) { + union lp_rast_cmd_arg *arg = get_data( &setup->data, sizeof *arg ); + + arg->clear_zstencil = + util_pack_z_stencil(setup->fb.zsbuf->format, + depth, + stencil); - bin_everywhere(setup, lp_rast_clear_zstencil, clear_info ); + bin_everywhere(setup, lp_rast_clear_zstencil, arg ); } } else { + /* Put ourselves into the 'pre-clear' state, specifically to try + * and accumulate multiple clears to color and depth_stencil + * buffers which the app or state-tracker might issue + * separately. + */ set_state( setup, SETUP_CLEARED ); setup->clear.flags |= flags; if (flags & PIPE_CLEAR_COLOR) { - util_pack_color(rgba, + util_pack_color(color, setup->fb.cbuf->format, &setup->clear.color.clear_color ); } - if (flags & PIPE_CLEAR_DEPTH_STENCIL) { + if (flags & PIPE_CLEAR_DEPTHSTENCIL) { setup->clear.zstencil.clear_zstencil = util_pack_z_stencil(setup->fb.zsbuf->format, depth, @@ -300,28 +353,21 @@ lp_setup_clear( struct setup_context *setup, void lp_setup_set_fs_inputs( struct setup_context *setup, - const enum lp_interp *interp, + const struct lp_shader_input *input, unsigned nr ) { - memcpy( setup->interp, interp, nr * sizeof interp[0] ); + memcpy( setup->fs.input, input, nr * sizeof input[0] ); + setup->fs.nr_inputs = nr; } void lp_setup_set_shader_state( struct setup_context *setup, - const struct jit_context *jc ) + const struct lp_jit_context *jc ) { + } -static void -first_triangle( struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4]) -{ - set_state( setup, STATE_ACTIVE ); - setup_choose_triangle( setup, v0, v1, v2 ); -} @@ -352,7 +398,8 @@ lp_setup_tri(struct setup_context *setup, } -void setup_destroy_context( struct setup_context *setup ) +void +lp_setup_destroy( struct setup_context *setup ) { lp_rast_destroy( setup->rast ); FREE( setup ); @@ -363,18 +410,20 @@ void setup_destroy_context( struct setup_context *setup ) * Create a new primitive tiling engine. Currently also creates a * rasterizer to use with it. */ -struct setup_context *setup_create_context( void ) +struct setup_context * +lp_setup_create( void ) { struct setup_context *setup = CALLOC_STRUCT(setup_context); + unsigned i, j; - setup->rast = lp_rast_create( void ); + setup->rast = lp_rast_create(); if (!setup->rast) goto fail; for (i = 0; i < TILES_X; i++) for (j = 0; j < TILES_Y; j++) - setup->tile[i][j].first = - setup->tile[i][j].next = CALLOC_STRUCT(cmd_block); + setup->tile[i][j].head = + setup->tile[i][j].tail = CALLOC_STRUCT(cmd_block); return setup; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 7c813070b9..04f9f87892 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -37,8 +37,15 @@ enum lp_interp { LP_INTERP_FACING }; +struct lp_shader_input { + enum lp_interp interp; + unsigned vs_output; +}; + struct pipe_texture; +struct pipe_surface; struct setup_context; +struct lp_jit_context; struct setup_context * lp_setup_create( void ); @@ -83,12 +90,12 @@ lp_setup_set_triangle_state( struct setup_context *setup, void lp_setup_set_fs_inputs( struct setup_context *setup, - const enum lp_interp *interp, + const struct lp_shader_input *interp, unsigned nr ); void lp_setup_set_shader_state( struct setup_context *setup, - const struct jit_context *jc ); + const struct lp_jit_context *jc ); boolean lp_setup_is_texture_referenced( struct setup_context *setup, diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 5722e3e9de..37caeed85f 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -31,8 +31,17 @@ #include "lp_setup.h" #include "lp_rast.h" +/* We're limited to 2K by 2K for 32bit fixed point rasterization. + * Will need a 64-bit version for larger framebuffers. + */ +#define MAXHEIGHT 2048 +#define MAXWIDTH 2048 +#define TILES_X (MAXWIDTH / TILESIZE) +#define TILES_Y (MAXHEIGHT / TILESIZE) + #define CMD_BLOCK_MAX 128 #define DATA_BLOCK_SIZE (16 * 1024 - sizeof(unsigned) - sizeof(void *)) + /* switch to a non-pointer value for this: */ @@ -62,12 +71,6 @@ struct data_block_list { }; -/* We're limited to 2K by 2K for 32bit fixed point rasterization. - * Will need a 64-bit version for larger framebuffers. - */ -#define MAXHEIGHT 2048 -#define MAXWIDTH 2048 - struct setup_context { struct lp_rasterizer *rast; @@ -75,15 +78,15 @@ struct setup_context { /* When there are multiple threads, will want to double-buffer the * bin arrays: */ - struct cmd_block_list tile[MAXHEIGHT / TILESIZE][MAXWIDTH / TILESIZE]; + struct cmd_block_list tile[TILES_X][TILES_Y]; struct data_block_list data; unsigned tiles_x; unsigned tiles_y; struct { - struct pipe_surface *color; - struct pipe_surface *zstencil; + struct pipe_surface *cbuf; + struct pipe_surface *zsbuf; } fb; struct { @@ -99,7 +102,7 @@ struct setup_context { } state; struct { - enum lp_interp inputs[PIPE_MAX_ATTRIBS]; + struct lp_shader_input input[PIPE_MAX_ATTRIBS]; unsigned nr_inputs; } fs; @@ -116,6 +119,11 @@ struct setup_context { const float (*v2)[4]); }; +void lp_setup_choose_triangle( struct setup_context *setup ); +void lp_setup_choose_line( struct setup_context *setup ); +void lp_setup_choose_point( struct setup_context *setup ); + + void lp_setup_new_data_block( struct data_block_list *list ); void lp_setup_new_cmd_block( struct cmd_block_list *list ); -- cgit v1.2.3 From de902d3275d1861beb0cebdf0807a17e2682c8de Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 10:23:53 +0100 Subject: llvmpipe: more wip on coefficients --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 206 ++++++++++++++-------------- 1 file changed, 103 insertions(+), 103 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index efd91124a0..382a52e951 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -29,23 +29,23 @@ * Binning code for triangles */ -#include "lp_setup.h" -#include "lp_state.h" +#include "lp_setup_context.h" #include "util/u_math.h" #include "util/u_memory.h" +#define NUM_CHANNELS 4 /** * Compute a0 for a constant-valued coefficient (GL_FLAT shading). */ static void constant_coef( struct lp_rast_triangle *tri, - const float (*v3)[4], - unsigned vert_attr, - unsigned i ) + unsigned slot, + const float value, + unsigned i ) { - tri->inputs.a0[i] = v3[vert_attr][i]; - tri->inputs.dadx[i] = 0; - tri->inputs.dady[i] = 0; + tri->inputs.a0[slot][i] = value; + tri->inputs.dadx[slot][i] = 0; + tri->inputs.dady[slot][i] = 0; } /** @@ -53,45 +53,40 @@ static void constant_coef( struct lp_rast_triangle *tri, * for a triangle. */ static void linear_coef( struct lp_rast_triangle *tri, - unsigned input, - const float (*v1)[4], - const float (*v2)[4], - const float (*v3)[4], - unsigned vert_attr) + unsigned slot, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4], + unsigned vert_attr, + unsigned i) { - unsigned i; - - input *= 4; - - for (i = 0; i < NUM_CHANNELS; i++) { - float a1 = v1[vert_attr][i]; - float a2 = v2[vert_attr][i]; - float a3 = v3[vert_attr][i]; - - float da12 = a1 - a2; - float da31 = a3 - a1; - float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; - float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; - - tri->inputs.dadx[input+i] = dadx; - tri->inputs.dady[input+i] = dady; - - /* calculate a0 as the value which would be sampled for the - * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). - * - * this is neat but unfortunately not a good way to do things for - * triangles with very large values of dadx or dady as it will - * result in the subtraction and re-addition from a0 of a very - * large number, which means we'll end up loosing a lot of the - * fractional bits and precision from a0. the way to fix this is - * to define a0 as the sample at a pixel center somewhere near vmin - * instead - i'll switch to this later. - */ - tri->inputs.a0[input+i] = (v1[vert_attr][i] - - (dadx * (v1[0][0] - 0.5f) + - dady * (v1[0][1] - 0.5f))); - } + float a1 = v1[vert_attr][i]; + float a2 = v2[vert_attr][i]; + float a3 = v3[vert_attr][i]; + + float da12 = a1 - a2; + float da31 = a3 - a1; + float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; + float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; + + tri->inputs.dadx[slot][i] = dadx; + tri->inputs.dady[slot][i] = dady; + + /* calculate a0 as the value which would be sampled for the + * fragment at (0,0), taking into account that we want to sample at + * pixel centers, in other words (0.5, 0.5). + * + * this is neat but unfortunately not a good way to do things for + * triangles with very large values of dadx or dady as it will + * result in the subtraction and re-addition from a0 of a very + * large number, which means we'll end up loosing a lot of the + * fractional bits and precision from a0. the way to fix this is + * to define a0 as the sample at a pixel center somewhere near vmin + * instead - i'll switch to this later. + */ + tri->inputs.a0[slot][i] = (v1[vert_attr][i] - + (dadx * (v1[0][0] - 0.5f) + + dady * (v1[0][1] - 0.5f))); } @@ -104,34 +99,29 @@ static void linear_coef( struct lp_rast_triangle *tri, * divide the interpolated value by the interpolated W at that fragment. */ static void perspective_coef( struct lp_rast_triangle *tri, + unsigned slot, const float (*v1)[4], const float (*v2)[4], const float (*v3)[4], unsigned vert_attr, - unsigned i) + unsigned i) { - unsigned i; - - input *= 4; - - for (i = 0; i < NUM_CHANNELS; i++) { - /* premultiply by 1/w (v[0][3] is always 1/w): - */ - float a1 = v1[vert_attr][i] * v1[0][3]; - float a2 = v2[vert_attr][i] * v2[0][3]; - float a3 = v3[vert_attr][i] * v3[0][3]; - float da12 = a1 - a2; - float da31 = a3 - a1; - float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; - float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; - - - tri->inputs.dadx[input+i] = dadx; - tri->inputs.dady[input+i] = dady; - tri->inputs.a0[input+i] = (a1 - - (dadx * (v1[0][0] - 0.5f) + - dady * (v1[0][1] - 0.5f))); - } + /* premultiply by 1/w (v[0][3] is always 1/w): + */ + float a1 = v1[vert_attr][i] * v1[0][3]; + float a2 = v2[vert_attr][i] * v2[0][3]; + float a3 = v3[vert_attr][i] * v3[0][3]; + float da12 = a1 - a2; + float da31 = a3 - a1; + float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; + float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; + + + tri->inputs.dadx[slot][i] = dadx; + tri->inputs.dady[slot][i] = dady; + tri->inputs.a0[slot][i] = (a1 - + (dadx * (v1[0][0] - 0.5f) + + dady * (v1[0][1] - 0.5f))); } @@ -142,29 +132,37 @@ static void perspective_coef( struct lp_rast_triangle *tri, * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. */ static void -setup_fragcoord_coef(struct lp_rast_triangle *tri, unsigned slot) +setup_fragcoord_coef(struct lp_rast_triangle *tri, + unsigned slot, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4]) { - slot *= 4; - /*X*/ - tri->inputs.a0[slot+0] = 0.0; - tri->inputs.dadx[slot+0] = 1.0; - tri->inputs.dady[slot+0] = 0.0; + tri->inputs.a0[slot][0] = 0.0; + tri->inputs.dadx[slot][0] = 1.0; + tri->inputs.dady[slot][0] = 0.0; /*Y*/ - tri->inputs.a0[slot+1] = 0.0; - tri->inputs.dadx[slot+1] = 0.0; - tri->inputs.dady[slot+1] = 1.0; + tri->inputs.a0[slot][1] = 0.0; + tri->inputs.dadx[slot][1] = 0.0; + tri->inputs.dady[slot][1] = 1.0; /*Z*/ - tri->inputs.a0[slot+2] = tri->inputs.a0[2]; - tri->inputs.dadx[slot+2] = tri->inputs.dadx[2]; - tri->inputs.dady[slot+2] = tri->inputs.dady[2]; + linear_coef(tri, slot, v1, v2, v3, 0, 2); /*W*/ - tri->inputs.a0[slot+3] = tri->inputs.a0[3]; - tri->inputs.dadx[slot+3] = tri->inputs.dadx[3]; - tri->inputs.dady[slot+3] = tri->inputs.dady[3]; + linear_coef(tri, slot, v1, v2, v3, 0, 3); } +static void setup_facing_coef( struct lp_rast_triangle *tri, + unsigned slot, + boolean frontface ) +{ + constant_coef( tri, slot, 1.0f - frontface, 0 ); + constant_coef( tri, slot, 0.0f, 1 ); /* wasted */ + constant_coef( tri, slot, 0.0f, 2 ); /* wasted */ + constant_coef( tri, slot, 0.0f, 3 ); /* wasted */ +} + /** * Compute the tri->coef[] array dadx, dady, a0 values. @@ -176,40 +174,42 @@ static void setup_tri_coefficients( struct setup_context *setup, const float (*v3)[4], boolean frontface ) { - unsigned input; + unsigned slot; - /* z and w are done by linear interpolation: + /* The internal position input is in slot zero: */ - setup_fragcoord_coef(tri, 0); - linear_coef(tri, input, v1, v2, v3, vert_attr, i); + setup_fragcoord_coef(tri, 0, v1, v2, v3); /* setup interpolation for all the remaining attrbutes: */ - for (input = 0; input < setup->fs.nr_inputs; input++) { - unsigned vert_attr = setup->fs.input[input].src_index; + for (slot = 0; slot < setup->fs.nr_inputs; slot++) { + unsigned vert_attr = setup->fs.input[slot].src_index; unsigned i; - switch (setup->fs.input[input].interp_mode) { + switch (setup->fs.input[slot].interp) { case LP_INTERP_CONSTANT: - constant_coef(tri, input, v3, vert_attr, i); + for (i = 0; i < NUM_CHANNELS; i++) + constant_coef(tri, slot+1, v3[vert_attr][i], i); break; case LP_INTERP_LINEAR: - linear_coef(tri, input, v1, v2, v3, vert_attr, i); + for (i = 0; i < NUM_CHANNELS; i++) + linear_coef(tri, slot+1, v1, v2, v3, vert_attr, i); break; case LP_INTERP_PERSPECTIVE: - perspective_coef(tri, input, v1, v2, v3, vert_attr, i); + for (i = 0; i < NUM_CHANNELS; i++) + perspective_coef(tri, slot+1, v1, v2, v3, vert_attr, i); break; - case LP_INTERP_POS: - setup_fragcoord_coef(tri, input); + case LP_INTERP_POSITION: + /* XXX: fix me - duplicates the values in slot zero. + */ + setup_fragcoord_coef(tri, slot+1, v1, v2, v3); break; case LP_INTERP_FACING: - tri->inputs.a0[input*4+0] = 1.0f - frontface; - tri->inputs.dadx[input*4+0] = 0.0; - tri->da[input].dady[0] = 0.0; + setup_facing_coef(tri, slot+1, frontface); break; default: @@ -246,14 +246,14 @@ static inline float subpixel_snap( float a ) #define MAX3(a,b,c) MAX2(MAX2(a,b),c) static void -do_triangle_ccw(struct lp_setup *setup, +do_triangle_ccw(struct setup_context *setup, const float (*v1)[4], const float (*v2)[4], const float (*v3)[4], boolean frontfacing ) { - const int rt_width = setup->framebuffer.cbufs[0]->width; - const int rt_height = setup->framebuffer.cbufs[0]->height; + const int rt_width = setup->fb.width; + const int rt_height = setup->fb.height; const float y1 = subpixel_snap(v1[0][1]); const float y2 = subpixel_snap(v2[0][1]); @@ -263,7 +263,7 @@ do_triangle_ccw(struct lp_setup *setup, const float x2 = subpixel_snap(v2[0][0]); const float x3 = subpixel_snap(v3[0][0]); - struct lp_setup_triangle *tri = lp_setup_alloc_data( setup, sizeof *tri ); + struct lp_setup_triangle *tri = get_data( setup, sizeof *tri ); float area; float c1, c2, c3; int i; -- cgit v1.2.3 From 84ab7dcf48e87350c0622c533e51aa495f7256c2 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 10:24:19 +0100 Subject: llvmpipe: calculate overall width and height, pass to rasterizer --- src/gallium/drivers/llvmpipe/lp_rast.c | 24 ++++++++---- src/gallium/drivers/llvmpipe/lp_rast.h | 15 ++++++-- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 4 ++ src/gallium/drivers/llvmpipe/lp_setup.c | 51 ++++++++++++++++++++----- src/gallium/drivers/llvmpipe/lp_setup.h | 10 ++++- src/gallium/drivers/llvmpipe/lp_setup_context.h | 2 + 6 files changed, 84 insertions(+), 22 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 695ddc089a..6ac44feb4c 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -48,6 +48,17 @@ struct lp_rasterizer *lp_rast_create( void ) return rast; } + +void lp_rast_begin( struct lp_rasterizer *rast, + unsigned width, + unsigned height ) +{ + rast->width = width; + rast->height = height; + rast->check_for_clipped_tiles = (width % TILESIZE != 0 || + height % TILESIZE != 0); +} + void lp_rast_bind_color( struct lp_rasterizer *rast, struct pipe_surface *cbuf, boolean write_color ) @@ -195,8 +206,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, */ -void lp_rast_end_tile( struct lp_rasterizer *rast, - boolean write_depth ) +void lp_rast_end_tile( struct lp_rasterizer *rast ) { struct pipe_surface *surface; struct pipe_screen *screen; @@ -213,10 +223,10 @@ void lp_rast_end_tile( struct lp_rasterizer *rast, screen = surface->texture->screen; - if(x + w > surface->width) - w = surface->width - x; - if(y + h > surface->height) - h = surface->height - y; + if(x + w > rast->width) + w = rast->width - x; + if(y + h > rast->height) + h = rast->height - y; transfer = screen->get_tex_transfer(screen, surface->texture, @@ -240,7 +250,7 @@ void lp_rast_end_tile( struct lp_rasterizer *rast, screen->tex_transfer_destroy(transfer); - if (write_depth) { + if (0) { /* FIXME: call u_tile func to store depth/stencil to surface */ } } diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 64d668f998..26d057beb2 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -97,6 +97,11 @@ struct lp_rast_triangle { float dx23; float dx31; + /* XXX: these are only used inside lp_setup_tri.c, don't really + * need to bin them: + */ + float oneoverarea; + /* inputs for the shader */ struct lp_rast_shader_inputs inputs; }; @@ -105,13 +110,17 @@ struct lp_rast_triangle { struct lp_rasterizer *lp_rast_create( void ); +void lp_rast_begin( struct lp_rasterizer *, + unsigned width, + unsigned height); + void lp_rast_bind_color( struct lp_rasterizer *, struct pipe_surface *cbuf, boolean write_when_done ); -void lp_rast_bind_depth( struct lp_rasterizer *, - struct pipe_surface *zsbuf, - boolean write_when_done ); +void lp_rast_bind_zstencil( struct lp_rasterizer *, + struct pipe_surface *zsbuf, + boolean write_when_done ); /* Begining of each tile: */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 29e4c8fd80..d7a8b9c257 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -52,7 +52,11 @@ struct lp_rasterizer { unsigned x; unsigned y; + boolean clipped_tile; + boolean check_for_clipped_tiles; + unsigned width; + unsigned height; struct { struct pipe_surface *cbuf; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 9f1b3d21f0..4f10080816 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -160,13 +160,23 @@ rasterize_bins( struct setup_context *setup, struct cmd_block *block; unsigned i,j,k; + if (setup->state != SETUP_ACTIVE) { + /* this can happen, not a big deal */ + debug_printf("%s called when not binning\n", __FUNCTION__); + return; + } + + lp_rast_begin( rast, + setup->fb.width, + setup->fb.height ); + lp_rast_bind_color( rast, setup->fb.cbuf, - TRUE ); /* WRITE */ + setup->fb.cbuf != NULL ); - lp_rast_bind_depth( rast, - setup->fb.zsbuf, - write_depth ); /* WRITE */ + lp_rast_bind_zstencil( rast, + setup->fb.zsbuf, + setup->fb.zsbuf != NULL && write_depth ); for (i = 0; i < setup->tiles_x; i++) { for (j = 0; j < setup->tiles_y; j++) { @@ -193,15 +203,38 @@ rasterize_bins( struct setup_context *setup, static void begin_binning( struct setup_context *setup ) { + if (!setup->fb.cbuf && !setup->fb.zsbuf) { + setup->fb.width = 0; + setup->fb.height = 0; + } + else if (!setup->fb.zsbuf) { + setup->fb.width = setup->fb.cbuf->width; + setup->fb.height = setup->fb.cbuf->height; + } + else if (!setup->fb.cbuf) { + setup->fb.width = setup->fb.zsbuf->width; + setup->fb.height = setup->fb.zsbuf->height; + } + else { + /* XXX: not sure what we're really supposed to do for + * mis-matched color & depth buffer sizes. + */ + setup->fb.width = MIN2(setup->fb.cbuf->width, + setup->fb.zsbuf->width); + setup->fb.height = MIN2(setup->fb.cbuf->height, + setup->fb.zsbuf->height); + } + + setup->tiles_x = align(setup->fb.width, TILESIZE); + setup->tiles_y = align(setup->fb.height, TILESIZE); + if (setup->fb.cbuf) { if (setup->clear.flags & PIPE_CLEAR_COLOR) bin_everywhere( setup, lp_rast_clear_color, &setup->clear.color ); else - bin_everywhere( setup, - lp_rast_load_color, - NULL ); + bin_everywhere( setup, lp_rast_load_color, NULL ); } if (setup->fb.zsbuf) { @@ -210,9 +243,7 @@ begin_binning( struct setup_context *setup ) lp_rast_clear_zstencil, &setup->clear.zstencil ); else - bin_everywhere( setup, - lp_rast_load_zstencil, - NULL ); + bin_everywhere( setup, lp_rast_load_zstencil, NULL ); } } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 04f9f87892..bd439fa857 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -37,9 +37,15 @@ enum lp_interp { LP_INTERP_FACING }; +/* Describes how to generate all the fragment shader inputs from the + * the vertices passed into our triangle/line/point functions. + * + * Vertices are treated as an array of float[4] values, indexed by + * src_index. + */ struct lp_shader_input { - enum lp_interp interp; - unsigned vs_output; + enum lp_interp interp; /* how to interpolate values */ + unsigned src_index; /* where to find values in incoming vertices */ }; struct pipe_texture; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 37caeed85f..7410ac70b8 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -87,6 +87,8 @@ struct setup_context { struct { struct pipe_surface *cbuf; struct pipe_surface *zsbuf; + unsigned width; + unsigned height; } fb; struct { -- cgit v1.2.3 From 47510040a68f5f672aee22eac6c01fb4dd60ec67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Fri, 9 Oct 2009 10:37:24 +0100 Subject: llvmpipe: Follow write_color/write_zstencil. --- src/gallium/drivers/llvmpipe/lp_rast.c | 24 ++++++++++++++++++++---- src/gallium/drivers/llvmpipe/lp_rast.h | 6 ------ src/gallium/drivers/llvmpipe/lp_rast_priv.h | 2 ++ 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 6ac44feb4c..9825099c94 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -64,6 +64,7 @@ void lp_rast_bind_color( struct lp_rasterizer *rast, boolean write_color ) { pipe_surface_reference(&rast->state.cbuf, cbuf); + rast->state.write_color = write_color; } void lp_rast_bind_zstencil( struct lp_rasterizer *rast, @@ -71,6 +72,7 @@ void lp_rast_bind_zstencil( struct lp_rasterizer *rast, boolean write_zstencil ) { pipe_surface_reference(&rast->state.zsbuf, zsbuf); + rast->state.write_zstencil = write_zstencil; } @@ -206,7 +208,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, */ -void lp_rast_end_tile( struct lp_rasterizer *rast ) +static void lp_rast_store_color( struct lp_rasterizer *rast ) { struct pipe_surface *surface; struct pipe_screen *screen; @@ -250,11 +252,25 @@ void lp_rast_end_tile( struct lp_rasterizer *rast ) screen->tex_transfer_destroy(transfer); - if (0) { - /* FIXME: call u_tile func to store depth/stencil to surface */ - } } + +static void lp_rast_store_zstencil( struct lp_rasterizer *rast ) +{ + /* FIXME: call u_tile func to store depth/stencil to surface */ +} + + +void lp_rast_end_tile( struct lp_rasterizer *rast ) +{ + if (rast->state.write_color) + lp_rast_store_color(rast); + + if (rast->state.write_zstencil) + lp_rast_store_zstencil(rast); +} + + /* Shutdown: */ void lp_rast_destroy( struct lp_rasterizer *rast ) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 26d057beb2..aa50fba5a6 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -163,12 +163,6 @@ void lp_rast_shade_tile( struct lp_rasterizer *, const union lp_rast_cmd_arg *, const struct lp_rast_shader_inputs *); -void lp_rast_store_color( struct lp_rasterizer *, - const union lp_rast_cmd_arg *); - -void lp_rast_store_zstencil( struct lp_rasterizer *, - const union lp_rast_cmd_arg *); - /* End of tile: */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index d7a8b9c257..f5a6699ed4 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -61,6 +61,8 @@ struct lp_rasterizer { struct { struct pipe_surface *cbuf; struct pipe_surface *zsbuf; + boolean write_color; + boolean write_zstencil; unsigned clear_color; unsigned clear_depth; char clear_stencil; -- cgit v1.2.3 From 415b271b5100d64579690111bc8eb549866865a7 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 10:44:07 +0100 Subject: llvmpipe: hook up some state, add stub line and point functions --- src/gallium/drivers/llvmpipe/SConscript | 3 ++ src/gallium/drivers/llvmpipe/lp_setup.c | 23 +++++++++++- src/gallium/drivers/llvmpipe/lp_setup_context.h | 9 +++-- src/gallium/drivers/llvmpipe/lp_setup_line.c | 47 +++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_setup_point.c | 46 ++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_setup_tri.c | 20 +++++------ 6 files changed, 133 insertions(+), 15 deletions(-) create mode 100644 src/gallium/drivers/llvmpipe/lp_setup_line.c create mode 100644 src/gallium/drivers/llvmpipe/lp_setup_point.c diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index f6945535ca..3530e739cc 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -48,6 +48,9 @@ llvmpipe = env.ConvenienceLibrary( 'lp_prim_vbuf.c', 'lp_query.c', 'lp_setup.c', + 'lp_setup_tri.c', + 'lp_setup_line.c', + 'lp_setup_point.c', 'lp_screen.c', 'lp_state_blend.c', 'lp_state_clip.c', diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 4f10080816..13b40f1494 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -148,7 +148,7 @@ static void bin_everywhere( struct setup_context *setup, unsigned i, j; for (i = 0; i < setup->tiles_x; i++) for (j = 0; j < setup->tiles_y; j++) - bin_cmd( &setup->tile[i][j], cmd, arg ); + bin_command( &setup->tile[i][j], cmd, arg ); } @@ -382,6 +382,19 @@ lp_setup_clear( struct setup_context *setup, } + +void +lp_setup_set_tri_state( struct setup_context *setup, + unsigned cull_mode, + boolean ccw_is_frontface) +{ + setup->ccw_is_frontface = ccw_is_frontface; + setup->cullmode = cull_mode; + setup->triangle = first_triangle; +} + + + void lp_setup_set_fs_inputs( struct setup_context *setup, const struct lp_shader_input *input, @@ -432,6 +445,14 @@ lp_setup_tri(struct setup_context *setup, void lp_setup_destroy( struct setup_context *setup ) { + unsigned i, j; + + reset_context( setup ); + + for (i = 0; i < TILES_X; i++) + for (j = 0; j < TILES_Y; j++) + FREE(setup->tile[i][j].head); + lp_rast_destroy( setup->rast ); FREE( setup ); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 7410ac70b8..9411f14cfb 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -83,6 +83,9 @@ struct setup_context { unsigned tiles_x; unsigned tiles_y; + + boolean ccw_is_frontface; + unsigned cullmode; struct { struct pipe_surface *cbuf; @@ -147,9 +150,9 @@ static INLINE void *get_data( struct data_block_list *list, /* Add a command to a given bin. */ -static INLINE void bin_cmd( struct cmd_block_list *list, - lp_rast_cmd cmd, - const union lp_rast_cmd_arg *arg ) +static INLINE void bin_command( struct cmd_block_list *list, + lp_rast_cmd cmd, + const union lp_rast_cmd_arg *arg ) { if (list->tail->count == CMD_BLOCK_MAX) { lp_setup_new_cmd_block( list ); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c new file mode 100644 index 0000000000..feea79d394 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -0,0 +1,47 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Binning code for lines + */ + +#include "lp_setup_context.h" + +static void line_nop( struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4] ) +{ +} + + +void +lp_setup_choose_line( struct setup_context *setup ) +{ + setup->line = line_nop; +} + + diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c new file mode 100644 index 0000000000..f03ca729b2 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -0,0 +1,46 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Binning code for points + */ + +#include "lp_setup_context.h" + +static void point_nop( struct setup_context *setup, + const float (*v0)[4] ) +{ +} + + +void +lp_setup_choose_point( struct setup_context *setup ) +{ + setup->point = point_nop; +} + + diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 382a52e951..d3b8ce9434 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -30,6 +30,7 @@ */ #include "lp_setup_context.h" +#include "lp_rast.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -263,10 +264,9 @@ do_triangle_ccw(struct setup_context *setup, const float x2 = subpixel_snap(v2[0][0]); const float x3 = subpixel_snap(v3[0][0]); - struct lp_setup_triangle *tri = get_data( setup, sizeof *tri ); + struct lp_rast_triangle *tri = get_data( &setup->data, sizeof *tri ); float area; float c1, c2, c3; - int i; int minx, maxx, miny, maxy; tri->dx12 = x1 - x2; @@ -363,7 +363,7 @@ do_triangle_ccw(struct setup_context *setup, { /* Triangle is contained in a single tile: */ - bin_command(setup->tile[minx][miny], lp_rast_triangle, tri ); + bin_command( &setup->tile[minx][miny], lp_rast_triangle, tri ); } else { @@ -412,12 +412,12 @@ do_triangle_ccw(struct setup_context *setup, cx3 + ei3 > 0) { /* shade whole tile */ - bin_command(setup->tile[x][y], lp_rast_shade_tile, &tri->inputs ); + bin_command( &setup->tile[x][y], lp_rast_shade_tile, &tri->inputs ); } else { /* shade partial tile */ - bin_command(setup->tile[x][y], lp_rast_triangle, tri ); + bin_command( &setup->tile[x][y], lp_rast_triangle, tri ); } /* Iterate cx values across the region: @@ -477,13 +477,11 @@ static void triangle_nop( struct setup_context *setup, { } -void setup_set_tri_state( struct setup_context *setup, - unsigned cull_mode, - boolean ccw_is_frontface) -{ - setup->ccw_is_frontface = ccw_is_frontface; - switch (cull_mode) { +void +lp_setup_choose_triangle( struct setup_context *setup ) +{ + switch (setup->cull_mode) { case PIPE_WINDING_NONE: setup->triangle = triangle_both; break; -- cgit v1.2.3 From 4cdd10cb4b60d85f6c231a26739f7d5e264a05e5 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 11:29:01 +0100 Subject: llvmpipe: use union lp_cmd_rast_arg directly, rather than through a pointer The union itself consists of pointers. We don't need to be passing pointer to pointers. --- src/gallium/drivers/llvmpipe/lp_rast.c | 21 ++++----- src/gallium/drivers/llvmpipe/lp_rast.h | 54 +++++++++++++++++---- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 4 +- src/gallium/drivers/llvmpipe/lp_setup.c | 63 ++++++++++--------------- src/gallium/drivers/llvmpipe/lp_setup_context.h | 6 +-- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 17 +++++-- 6 files changed, 98 insertions(+), 67 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 9825099c94..de15ddbb2e 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -87,9 +87,9 @@ void lp_rast_start_tile( struct lp_rasterizer *rast, } void lp_rast_clear_color( struct lp_rasterizer *rast, - const union lp_rast_cmd_arg *arg ) + const union lp_rast_cmd_arg arg ) { - const uint8_t *clear_color = arg->clear_color; + const uint8_t *clear_color = arg.clear_color; if (clear_color[0] == clear_color[1] && clear_color[1] == clear_color[2] && @@ -106,25 +106,24 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, } void lp_rast_clear_zstencil( struct lp_rasterizer *rast, - const union lp_rast_cmd_arg *arg) + const union lp_rast_cmd_arg arg) { - const unsigned clear_zstencil = arg->clear_zstencil; unsigned i, j; for (i = 0; i < TILE_SIZE; i++) for (j = 0; j < TILE_SIZE; j++) - rast->tile.depth[i*TILE_SIZE + j] = clear_zstencil; + rast->tile.depth[i*TILE_SIZE + j] = arg.clear_zstencil; } void lp_rast_load_color( struct lp_rasterizer *rast, - const union lp_rast_cmd_arg *arg) + const union lp_rast_cmd_arg arg) { /* call u_tile func to load colors from surface */ } void lp_rast_load_zstencil( struct lp_rasterizer *rast, - const union lp_rast_cmd_arg *arg ) + const union lp_rast_cmd_arg arg ) { /* call u_tile func to load depth (and stencil?) from surface */ } @@ -132,17 +131,17 @@ void lp_rast_load_zstencil( struct lp_rasterizer *rast, /* Within a tile: */ void lp_rast_set_state( struct lp_rasterizer *rast, - const union lp_rast_cmd_arg *arg ) + const union lp_rast_cmd_arg arg ) { - rast->shader_state = arg->set_state; + rast->shader_state = arg.set_state; } void lp_rast_shade_tile( struct lp_rasterizer *rast, - const union lp_rast_cmd_arg *arg, - const struct lp_rast_shader_inputs *inputs ) + const union lp_rast_cmd_arg arg ) { + const struct lp_rast_shader_inputs *inputs = arg.shade_tile; const unsigned masks[4] = {~0, ~0, ~0, ~0}; unsigned x, y; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index aa50fba5a6..44cb4032da 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -134,34 +134,70 @@ union lp_rast_cmd_arg { const struct lp_rast_shader_inputs *shade_tile; const struct lp_rast_triangle *triangle; const struct lp_rast_state *set_state; - const uint8_t clear_color[4]; + uint8_t clear_color[4]; unsigned clear_zstencil; }; +/* Cast wrappers. Hopefully these compile to noops! + */ +static INLINE const union lp_rast_cmd_arg +lp_rast_arg_inputs( const struct lp_rast_shader_inputs *shade_tile ) +{ + union lp_rast_cmd_arg arg; + arg.shade_tile = shade_tile; + return arg; +} + +static INLINE const union lp_rast_cmd_arg +lp_rast_arg_triangle( const struct lp_rast_triangle *triangle ) +{ + union lp_rast_cmd_arg arg; + arg.triangle = triangle; + return arg; +} + +static INLINE const union lp_rast_cmd_arg +lp_rast_arg_state( const struct lp_rast_state *state ) +{ + union lp_rast_cmd_arg arg; + arg.set_state = state; + return arg; +} + +static INLINE const union lp_rast_cmd_arg +lp_rast_arg_null( void ) +{ + union lp_rast_cmd_arg arg; + arg.set_state = NULL; + return arg; +} + + + + /* Binnable Commands: */ void lp_rast_clear_color( struct lp_rasterizer *, - const union lp_rast_cmd_arg *); + const union lp_rast_cmd_arg ); void lp_rast_clear_zstencil( struct lp_rasterizer *, - const union lp_rast_cmd_arg *); + const union lp_rast_cmd_arg ); void lp_rast_load_color( struct lp_rasterizer *, - const union lp_rast_cmd_arg *); + const union lp_rast_cmd_arg ); void lp_rast_load_zstencil( struct lp_rasterizer *, - const union lp_rast_cmd_arg *); + const union lp_rast_cmd_arg ); void lp_rast_set_state( struct lp_rasterizer *, - const union lp_rast_cmd_arg * ); + const union lp_rast_cmd_arg ); void lp_rast_triangle( struct lp_rasterizer *, - const union lp_rast_cmd_arg * ); + const union lp_rast_cmd_arg ); void lp_rast_shade_tile( struct lp_rasterizer *, - const union lp_rast_cmd_arg *, - const struct lp_rast_shader_inputs *); + const union lp_rast_cmd_arg ); /* End of tile: diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 8cd3fcc360..efc635bffe 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -155,9 +155,9 @@ do_block( struct lp_rasterizer *rast, * for this triangle: */ void lp_rast_triangle( struct lp_rasterizer *rast, - const union lp_rast_cmd_arg *arg ) + const union lp_rast_cmd_arg arg ) { - const struct lp_rast_triangle *tri = arg->triangle; + const struct lp_rast_triangle *tri = arg.triangle; int minx, maxx, miny, maxy; /* Clamp to tile dimensions: diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 13b40f1494..c0c294fbe3 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -143,7 +143,7 @@ static void reset_context( struct setup_context *setup ) */ static void bin_everywhere( struct setup_context *setup, lp_rast_cmd cmd, - const union lp_rast_cmd_arg *arg ) + const union lp_rast_cmd_arg arg ) { unsigned i, j; for (i = 0; i < setup->tiles_x; i++) @@ -232,18 +232,18 @@ begin_binning( struct setup_context *setup ) if (setup->clear.flags & PIPE_CLEAR_COLOR) bin_everywhere( setup, lp_rast_clear_color, - &setup->clear.color ); + setup->clear.color ); else - bin_everywhere( setup, lp_rast_load_color, NULL ); + bin_everywhere( setup, lp_rast_load_color, lp_rast_arg_null() ); } if (setup->fb.zsbuf) { if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) bin_everywhere( setup, lp_rast_clear_zstencil, - &setup->clear.zstencil ); + setup->clear.zstencil ); else - bin_everywhere( setup, lp_rast_load_zstencil, NULL ); + bin_everywhere( setup, lp_rast_load_zstencil, lp_rast_arg_null() ); } } @@ -329,32 +329,34 @@ lp_setup_clear( struct setup_context *setup, unsigned stencil, unsigned flags ) { + if (flags & PIPE_CLEAR_COLOR) { + util_pack_color(color, + setup->fb.cbuf->format, + &setup->clear.color.clear_color ); + } + + if (flags & PIPE_CLEAR_DEPTHSTENCIL) { + setup->clear.zstencil.clear_zstencil = + util_pack_z_stencil(setup->fb.zsbuf->format, + depth, + stencil); + } + if (setup->state == SETUP_ACTIVE) { /* Add the clear to existing bins. In the unusual case where * both color and depth-stencilare being cleared, we could * discard the currently binned scene and start again, but I * don't see that as being a common usage. */ - if (flags & PIPE_CLEAR_COLOR) { - union lp_rast_cmd_arg *arg = get_data( &setup->data, sizeof *arg ); - - util_pack_color(color, - setup->fb.cbuf->format, - &arg->clear_color ); - - bin_everywhere(setup, lp_rast_clear_color, arg ); - } - - if (flags & PIPE_CLEAR_DEPTHSTENCIL) { - union lp_rast_cmd_arg *arg = get_data( &setup->data, sizeof *arg ); + if (flags & PIPE_CLEAR_COLOR) + bin_everywhere( setup, + lp_rast_clear_color, + setup->clear.color ); - arg->clear_zstencil = - util_pack_z_stencil(setup->fb.zsbuf->format, - depth, - stencil); - - bin_everywhere(setup, lp_rast_clear_zstencil, arg ); - } + if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) + bin_everywhere( setup, + lp_rast_clear_zstencil, + setup->clear.zstencil ); } else { /* Put ourselves into the 'pre-clear' state, specifically to try @@ -365,19 +367,6 @@ lp_setup_clear( struct setup_context *setup, set_state( setup, SETUP_CLEARED ); setup->clear.flags |= flags; - - if (flags & PIPE_CLEAR_COLOR) { - util_pack_color(color, - setup->fb.cbuf->format, - &setup->clear.color.clear_color ); - } - - if (flags & PIPE_CLEAR_DEPTHSTENCIL) { - setup->clear.zstencil.clear_zstencil = - util_pack_z_stencil(setup->fb.zsbuf->format, - depth, - stencil); - } } } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 9411f14cfb..b29fec8ef0 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -45,11 +45,11 @@ /* switch to a non-pointer value for this: */ -typedef void (*lp_rast_cmd)( struct lp_rasterizer *, const union lp_rast_cmd_arg * ); +typedef void (*lp_rast_cmd)( struct lp_rasterizer *, const union lp_rast_cmd_arg ); struct cmd_block { lp_rast_cmd cmd[CMD_BLOCK_MAX]; - const union lp_rast_cmd_arg *arg[CMD_BLOCK_MAX]; + union lp_rast_cmd_arg arg[CMD_BLOCK_MAX]; unsigned count; struct cmd_block *next; }; @@ -152,7 +152,7 @@ static INLINE void *get_data( struct data_block_list *list, */ static INLINE void bin_command( struct cmd_block_list *list, lp_rast_cmd cmd, - const union lp_rast_cmd_arg *arg ) + union lp_rast_cmd_arg arg ) { if (list->tail->count == CMD_BLOCK_MAX) { lp_setup_new_cmd_block( list ); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index d3b8ce9434..f927f9df91 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -230,7 +230,10 @@ static inline float subpixel_snap( float a ) } - +static INLINE void bin_triangle( struct cmd_block_list *list, + const struct lp_rast_triangle arg ) +{ +} /* to avoid having to allocate power-of-four, square render targets, @@ -363,7 +366,8 @@ do_triangle_ccw(struct setup_context *setup, { /* Triangle is contained in a single tile: */ - bin_command( &setup->tile[minx][miny], lp_rast_triangle, tri ); + bin_command( &setup->tile[minx][miny], lp_rast_triangle, + lp_rast_arg_triangle(tri) ); } else { @@ -412,12 +416,15 @@ do_triangle_ccw(struct setup_context *setup, cx3 + ei3 > 0) { /* shade whole tile */ - bin_command( &setup->tile[x][y], lp_rast_shade_tile, &tri->inputs ); + bin_command( &setup->tile[x][y], lp_rast_shade_tile, + lp_rast_arg_inputs(&tri->inputs) ); } else { /* shade partial tile */ - bin_command( &setup->tile[x][y], lp_rast_triangle, tri ); + bin_command( &setup->tile[x][y], + lp_rast_triangle, + lp_rast_arg_triangle(tri) ); } /* Iterate cx values across the region: @@ -481,7 +488,7 @@ static void triangle_nop( struct setup_context *setup, void lp_setup_choose_triangle( struct setup_context *setup ) { - switch (setup->cull_mode) { + switch (setup->cullmode) { case PIPE_WINDING_NONE: setup->triangle = triangle_both; break; -- cgit v1.2.3 From e215f94f15fd20919cc0ed500dc2efde4f076516 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 12:19:49 +0100 Subject: llvmpipe: get lp_setup_tri building --- src/gallium/drivers/llvmpipe/lp_rast.h | 14 +++++++++-- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 37 +++++++++++++++++------------ src/gallium/drivers/llvmpipe/lp_setup.c | 6 ++--- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 12 +++++----- 4 files changed, 43 insertions(+), 26 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 44cb4032da..72f897503d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -77,6 +77,11 @@ struct lp_rast_shader_inputs { * plus inputs to run the shader: */ struct lp_rast_triangle { + int minx; + int maxx; + int miny; + int maxy; + /* one-pixel sized trivial accept offsets for each plane */ float ei1; float ei2; @@ -97,8 +102,13 @@ struct lp_rast_triangle { float dx23; float dx31; - /* XXX: these are only used inside lp_setup_tri.c, don't really - * need to bin them: + /* edge function values at minx,miny ?? */ + float c1; + float c2; + float c3; + + /* XXX: this is only used inside lp_setup_tri.c, don't really + * need it here: */ float oneoverarea; diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index efc635bffe..7110afb9d5 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -158,21 +158,6 @@ void lp_rast_triangle( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { const struct lp_rast_triangle *tri = arg.triangle; - int minx, maxx, miny, maxy; - - /* Clamp to tile dimensions: - */ - minx = MAX2(tri->maxx, rast->x); - miny = MAX2(tri->miny, rast->y); - maxx = MIN2(tri->maxx, rast->x + TILE_SIZE); - maxy = MIN2(tri->maxy, rast->y + TILE_SIZE); - - if (miny == maxy || - minx == maxx) { - debug_printf("%s: non-intersecting triangle in bin\n", __FUNCTION__); - //assert(0); - return; - } const int step = BLOCKSIZE; @@ -191,11 +176,33 @@ void lp_rast_triangle( struct lp_rasterizer *rast, float ystep1 = step * tri->dx12; float ystep2 = step * tri->dx23; float ystep3 = step * tri->dx31; + + /* Clamp to tile dimensions: + */ + int minx = MAX2(tri->maxx, rast->x); + int miny = MAX2(tri->miny, rast->y); + int maxx = MIN2(tri->maxx, rast->x + TILE_SIZE); + int maxy = MIN2(tri->maxy, rast->y + TILE_SIZE); + int x, y; + float x0, y0; + float c1, c2, c3; + + if (miny == maxy || minx == maxx) { + debug_printf("%s: non-intersecting triangle in bin\n", __FUNCTION__); + return; + } minx &= ~(step-1); miny &= ~(step-1); + x0 = (float)minx; + y0 = (float)miny; + + c1 = tri->c1 + tri->dx12 * y0 - tri->dy12 * x0; + c2 = tri->c2 + tri->dx23 * y0 - tri->dy23 * x0; + c3 = tri->c3 + tri->dx31 * y0 - tri->dy31 * x0; + for (y = miny; y < maxy; y += step) { float cx1 = c1; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index c0c294fbe3..56bbee1f7c 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -373,9 +373,9 @@ lp_setup_clear( struct setup_context *setup, void -lp_setup_set_tri_state( struct setup_context *setup, - unsigned cull_mode, - boolean ccw_is_frontface) +lp_setup_set_triangle_state( struct setup_context *setup, + unsigned cull_mode, + boolean ccw_is_frontface) { setup->ccw_is_frontface = ccw_is_frontface; setup->cullmode = cull_mode; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index f927f9df91..5c402259df 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -315,9 +315,9 @@ do_triangle_ccw(struct setup_context *setup, /* half-edge constants, will be interated over the whole * rendertarget. */ - c1 = tri->dy12 * x1 - tri->dx12 * y1; - c2 = tri->dy23 * x2 - tri->dx23 * y2; - c3 = tri->dy31 * x3 - tri->dx31 * y3; + tri->c1 = tri->dy12 * x1 - tri->dx12 * y1; + tri->c2 = tri->dy23 * x2 - tri->dx23 * y2; + tri->c3 = tri->dy31 * x3 - tri->dx31 * y3; /* correct for top-left fill convention: */ @@ -351,9 +351,9 @@ do_triangle_ccw(struct setup_context *setup, minx &= ~(TILESIZE-1); /* aligned blocks */ miny &= ~(TILESIZE-1); /* aligned blocks */ - c1 += tri->dx12 * miny - tri->dy12 * minx; - c2 += tri->dx23 * miny - tri->dy23 * minx; - c3 += tri->dx31 * miny - tri->dy31 * minx; + c1 = tri->c1 + tri->dx12 * miny - tri->dy12 * minx; + c2 = tri->c2 + tri->dx23 * miny - tri->dy23 * minx; + c3 = tri->c3 + tri->dx31 * miny - tri->dy31 * minx; /* Convert to tile coordinates: */ -- cgit v1.2.3 From c1013f5d404880046f304de706d4216b08bd3011 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 12:26:49 +0100 Subject: llvmpipe: remove dead code --- src/gallium/drivers/llvmpipe/lp_flush.c | 1 + src/gallium/drivers/llvmpipe/lp_jit.c | 18 ------------------ src/gallium/drivers/llvmpipe/lp_jit.h | 4 ---- src/gallium/drivers/llvmpipe/lp_state_surface.c | 6 +++++- 4 files changed, 6 insertions(+), 23 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index d0dd41f09c..f7a1d89701 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -37,6 +37,7 @@ #include "lp_surface.h" #include "lp_state.h" #include "lp_winsys.h" +#include "lp_setup.h" void diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index a03eb874ac..fb6ec9bb37 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -108,24 +108,6 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) screen->context_ptr_type = LLVMPointerType(context_type, 0); } - /* fetch_texel - */ - { - LLVMTypeRef ret_type; - LLVMTypeRef arg_types[3]; - LLVMValueRef fetch_texel; - - ret_type = LLVMVoidType(); - arg_types[0] = LLVMPointerType(LLVMInt8Type(), 0); /* samplers */ - arg_types[1] = LLVMInt32Type(); /* unit */ - arg_types[2] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0); /* store */ - - fetch_texel = lp_declare_intrinsic(screen->module, "fetch_texel", - ret_type, arg_types, Elements(arg_types)); - - LLVMAddGlobalMapping(screen->engine, fetch_texel, lp_fetch_texel_soa); - } - #ifdef DEBUG LLVMDumpModule(screen->module); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 207dfbfde1..7eccb5da85 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -118,10 +118,6 @@ typedef void void *color, void *depth); -void PIPE_CDECL -lp_fetch_texel_soa( struct tgsi_sampler **samplers, - uint32_t unit, - float *store ); void diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c index bb1396c3ab..909ca9f117 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -31,6 +31,7 @@ #include "lp_context.h" #include "lp_state.h" #include "lp_surface.h" +#include "lp_setup.h" #include "draw/draw_context.h" @@ -82,7 +83,10 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, } if (dirty) { - lp_setup_set_framebuffer( lp->setup, fb ); + lp_setup_bind_framebuffer( lp->setup, + fb->cbufs[0], + fb->zsbuf ); + lp->dirty |= LP_NEW_FRAMEBUFFER; } } -- cgit v1.2.3 From c4d54b62f5491dbec9930563209639f8fb7dcf2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Fri, 9 Oct 2009 11:29:33 +0100 Subject: llvmpipe: Eliminate constant mapping/unmapping. --- src/gallium/drivers/llvmpipe/lp_context.h | 3 -- src/gallium/drivers/llvmpipe/lp_draw_arrays.c | 54 --------------------------- src/gallium/drivers/llvmpipe/lp_state_fs.c | 20 ++++++++-- 3 files changed, 17 insertions(+), 60 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 852f7a1d05..e34385bbae 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -86,9 +86,6 @@ struct llvmpipe_context { /** Mapped vertex buffers */ ubyte *mapped_vbuffer[PIPE_MAX_ATTRIBS]; - /** Mapped constant buffers */ - void *mapped_constants[PIPE_SHADER_TYPES]; - /** Vertex format */ struct vertex_info vertex_info; struct vertex_info vertex_info_vbuf; diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index 2bffcdb3ba..b879b5e755 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -45,54 +45,6 @@ -static void -llvmpipe_map_constant_buffers(struct llvmpipe_context *lp) -{ - struct pipe_screen *screen = lp->pipe.screen; - uint i, size; - - for (i = 0; i < PIPE_SHADER_TYPES; i++) { - if (lp->constants[i].buffer && lp->constants[i].buffer->size) - lp->mapped_constants[i] = screen->buffer_map(screen, lp->constants[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); - } - - if (lp->constants[PIPE_SHADER_VERTEX].buffer) - size = lp->constants[PIPE_SHADER_VERTEX].buffer->size; - else - size = 0; - - lp->jit_context.constants = lp->mapped_constants[PIPE_SHADER_FRAGMENT]; - - draw_set_mapped_constant_buffer(lp->draw, - lp->mapped_constants[PIPE_SHADER_VERTEX], - size); -} - - -static void -llvmpipe_unmap_constant_buffers(struct llvmpipe_context *lp) -{ - struct pipe_screen *screen = lp->pipe.screen; - uint i; - - /* really need to flush all prims since the vert/frag shaders const buffers - * are going away now. - */ - draw_flush(lp->draw); - - draw_set_mapped_constant_buffer(lp->draw, NULL, 0); - - lp->jit_context.constants = NULL; - - for (i = 0; i < 2; i++) { - if (lp->constants[i].buffer && lp->constants[i].buffer->size) - screen->buffer_unmap(screen, lp->constants[i].buffer); - lp->mapped_constants[i] = NULL; - } -} - - boolean llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) @@ -123,8 +75,6 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, if (lp->dirty) llvmpipe_update_derived( lp ); - llvmpipe_map_constant_buffers(lp); - /* * Map vertex buffers */ @@ -160,10 +110,6 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, draw_set_mapped_element_buffer(draw, 0, NULL); } - - /* Note: leave drawing surfaces mapped */ - llvmpipe_unmap_constant_buffers(lp); - return TRUE; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 2c8b383123..59c7afc6f7 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -83,6 +83,7 @@ #include "lp_bld_debug.h" #include "lp_screen.h" #include "lp_context.h" +#include "lp_buffer.h" #include "lp_state.h" #include "lp_tex_sample.h" @@ -670,16 +671,29 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) void llvmpipe_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + const struct pipe_constant_buffer *constants) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + struct pipe_buffer *buffer = constants ? constants->buffer : NULL; + unsigned size = buffer ? buffer->size : 0; + const void *data = buffer ? llvmpipe_buffer(buffer)->data : NULL; assert(shader < PIPE_SHADER_TYPES); assert(index == 0); + if(shader == PIPE_SHADER_VERTEX) + draw_flush(llvmpipe->draw); + /* note: reference counting */ - pipe_buffer_reference(&llvmpipe->constants[shader].buffer, - buf ? buf->buffer : NULL); + pipe_buffer_reference(&llvmpipe->constants[shader].buffer, buffer); + + if(shader == PIPE_SHADER_FRAGMENT) { + llvmpipe->jit_context.constants = data; + } + + if(shader == PIPE_SHADER_VERTEX) { + draw_set_mapped_constant_buffer(llvmpipe->draw, data, size); + } llvmpipe->dirty |= LP_NEW_CONSTANTS; } -- cgit v1.2.3 From 00ffef383c62ca6cd0d5687539dc45fecfbefeec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Fri, 9 Oct 2009 13:22:42 +0100 Subject: util: Force ESI register for cpuid's ebx result. Fixes a segfault and better code. Unfortunately using an arbitrary register ("=r") causes the gcc to abort when the code is optimized saying it can't satisfy the constraint. Setting seems to do the trick. --- src/gallium/auxiliary/util/u_cpu_detect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c index 70ce25cfcf..ded361ce70 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.c +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -346,7 +346,7 @@ cpuid(uint32_t ax, uint32_t *p) "cpuid\n\t" "xchgl %%ebx, %1" : "=a" (p[0]), - "=m" (p[1]), + "=S" (p[1]), "=c" (p[2]), "=d" (p[3]) : "0" (ax) -- cgit v1.2.3 From d904ed88c1d957f662497343de7dc3e9fa743e47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Fri, 9 Oct 2009 13:41:33 +0100 Subject: llvmpipe: Pass state to setup. --- src/gallium/drivers/llvmpipe/lp_context.h | 3 +- src/gallium/drivers/llvmpipe/lp_setup.c | 99 +++++++++++++++++++++++-- src/gallium/drivers/llvmpipe/lp_setup.h | 23 +++++- src/gallium/drivers/llvmpipe/lp_setup_context.h | 5 ++ src/gallium/drivers/llvmpipe/lp_state.h | 1 + src/gallium/drivers/llvmpipe/lp_state_blend.c | 18 ++--- src/gallium/drivers/llvmpipe/lp_state_derived.c | 18 +++++ src/gallium/drivers/llvmpipe/lp_state_fs.c | 7 +- src/gallium/drivers/llvmpipe/lp_state_sampler.c | 10 --- 9 files changed, 150 insertions(+), 34 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index e34385bbae..17c6939ff5 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -59,7 +59,7 @@ struct llvmpipe_context { const struct lp_vertex_shader *vs; /** Other rendering state */ - struct pipe_blend_color blend_color[4][16]; + struct pipe_blend_color blend_color; struct pipe_clip_state clip; struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; struct pipe_framebuffer_state framebuffer; @@ -120,7 +120,6 @@ struct llvmpipe_context { unsigned tex_timestamp; boolean no_rast; - struct lp_jit_context jit_context; }; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 56bbee1f7c..f999004a66 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -32,11 +32,15 @@ * lp_setup_flush(). */ -#include "lp_setup_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_pack_color.h" -#include "pipe/p_defines.h" +#include "lp_state.h" +#include "lp_buffer.h" +#include "lp_texture.h" +#include "lp_setup_context.h" static void set_state( struct setup_context *, unsigned ); @@ -394,14 +398,99 @@ lp_setup_set_fs_inputs( struct setup_context *setup, } void -lp_setup_set_shader_state( struct setup_context *setup, - const struct lp_jit_context *jc ) +lp_setup_set_fs( struct setup_context *setup, + struct lp_fragment_shader *fs ) { - + /* FIXME: reference count */ + + setup->fs.jit_function = fs->current->jit_function; } +void +lp_setup_set_fs_constants(struct setup_context *setup, + struct pipe_buffer *buffer) +{ + const void *data = buffer ? llvmpipe_buffer(buffer)->data : NULL; + struct pipe_buffer *dummy; + /* FIXME: hold on to the reference */ + dummy = NULL; + pipe_buffer_reference(&dummy, buffer); + setup->fs.jit_context.constants = data; + + setup->fs.jit_context_dirty = TRUE; +} + + +void +lp_setup_set_alpha_ref_value( struct setup_context *setup, + float alpha_ref_value ) +{ + if(setup->fs.jit_context.alpha_ref_value != alpha_ref_value) { + setup->fs.jit_context.alpha_ref_value = alpha_ref_value; + setup->fs.jit_context_dirty = TRUE; + } +} + +void +lp_setup_set_blend_color( struct setup_context *setup, + const struct pipe_blend_color *blend_color ) +{ + unsigned i, j; + + if(!setup->fs.jit_context.blend_color) + setup->fs.jit_context.blend_color = align_malloc(4 * 16, 16); + + for (i = 0; i < 4; ++i) { + uint8_t c = float_to_ubyte(blend_color->color[i]); + for (j = 0; j < 16; ++j) + setup->fs.jit_context.blend_color[i*4 + j] = c; + } + + setup->fs.jit_context_dirty = TRUE; +} + +void +lp_setup_set_sampler_textures( struct setup_context *setup, + unsigned num, struct pipe_texture **texture) +{ + struct pipe_texture *dummy; + unsigned i; + + assert(num <= PIPE_MAX_SAMPLERS); + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + struct pipe_texture *tex = i < num ? texture[i] : NULL; + + /* FIXME: hold on to the reference */ + dummy = NULL; + pipe_texture_reference(&dummy, tex); + + if(tex) { + struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex); + struct lp_jit_texture *jit_tex = &setup->fs.jit_context.textures[i]; + jit_tex->width = tex->width[0]; + jit_tex->height = tex->height[0]; + jit_tex->stride = lp_tex->stride[0]; + if(!lp_tex->dt) + jit_tex->data = lp_tex->data; + else + /* FIXME: map the rendertarget */ + assert(0); + } + } + + setup->fs.jit_context_dirty = TRUE; +} + +static void +lp_setup_set_shader_state( struct setup_context *setup, + const struct lp_jit_context *jc ) +{ + + +} /* Stubs for lines & points for now: diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index bd439fa857..ac9c3cc0ee 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -50,7 +50,9 @@ struct lp_shader_input { struct pipe_texture; struct pipe_surface; +struct pipe_blend_color; struct setup_context; +struct lp_fragment_shader; struct lp_jit_context; struct setup_context * @@ -100,8 +102,25 @@ lp_setup_set_fs_inputs( struct setup_context *setup, unsigned nr ); void -lp_setup_set_shader_state( struct setup_context *setup, - const struct lp_jit_context *jc ); +lp_setup_set_fs( struct setup_context *setup, + struct lp_fragment_shader *fs ); + +void +lp_setup_set_fs_constants(struct setup_context *setup, + struct pipe_buffer *buffer); + + +void +lp_setup_set_alpha_ref_value( struct setup_context *setup, + float alpha_ref_value ); + +void +lp_setup_set_blend_color( struct setup_context *setup, + const struct pipe_blend_color *blend_color ); + +void +lp_setup_set_sampler_textures( struct setup_context *setup, + unsigned num, struct pipe_texture **texture); boolean lp_setup_is_texture_referenced( struct setup_context *setup, diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index b29fec8ef0..2e2380dd80 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -109,6 +109,11 @@ struct setup_context { struct { struct lp_shader_input input[PIPE_MAX_ATTRIBS]; unsigned nr_inputs; + + struct lp_jit_context jit_context; + lp_jit_frag_func jit_function; + + boolean jit_context_dirty; } fs; void (*point)( struct setup_context *, diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index a9980d6f14..64fe3600f5 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -54,6 +54,7 @@ #define LP_NEW_VERTEX 0x1000 #define LP_NEW_VS 0x2000 #define LP_NEW_QUERY 0x4000 +#define LP_NEW_BLEND_COLOR 0x8000 struct tgsi_sampler; diff --git a/src/gallium/drivers/llvmpipe/lp_state_blend.c b/src/gallium/drivers/llvmpipe/lp_state_blend.c index 3f03bd0057..48afe5f524 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_state_blend.c @@ -67,17 +67,16 @@ void llvmpipe_set_blend_color( struct pipe_context *pipe, const struct pipe_blend_color *blend_color ) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - unsigned i, j; + + if(!blend_color) + return; + + if(memcmp(&llvmpipe->blend_color, blend_color, sizeof *blend_color) == 0) + return; memcpy(&llvmpipe->blend_color, blend_color, sizeof *blend_color); - if(!llvmpipe->jit_context.blend_color) - llvmpipe->jit_context.blend_color = align_malloc(4 * 16, 16); - for (i = 0; i < 4; ++i) { - uint8_t c = float_to_ubyte(blend_color->color[i]); - for (j = 0; j < 16; ++j) - llvmpipe->jit_context.blend_color[i*4 + j] = c; - } + llvmpipe->dirty |= LP_NEW_BLEND_COLOR; } @@ -101,9 +100,6 @@ llvmpipe_bind_depth_stencil_state(struct pipe_context *pipe, llvmpipe->depth_stencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; - if(llvmpipe->depth_stencil) - llvmpipe->jit_context.alpha_ref_value = llvmpipe->depth_stencil->alpha.ref_value; - llvmpipe->dirty |= LP_NEW_DEPTH_STENCIL_ALPHA; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index b801f054a2..00903c8ef4 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -33,6 +33,7 @@ #include "draw/draw_private.h" #include "lp_context.h" #include "lp_screen.h" +#include "lp_setup.h" #include "lp_state.h" @@ -256,6 +257,23 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) LP_NEW_TEXTURE)) llvmpipe_update_fs( llvmpipe ); + if (llvmpipe->dirty & (LP_NEW_BLEND | + LP_NEW_DEPTH_STENCIL_ALPHA | + LP_NEW_SAMPLER | + LP_NEW_TEXTURE)) + llvmpipe_update_fs( llvmpipe ); + + if (llvmpipe->dirty & LP_NEW_BLEND_COLOR) + lp_setup_set_blend_color(llvmpipe->setup, &llvmpipe->blend_color); + + if (llvmpipe->dirty & LP_NEW_DEPTH_STENCIL_ALPHA) + lp_setup_set_alpha_ref_value(llvmpipe->setup, llvmpipe->depth_stencil->alpha.ref_value); + + if (llvmpipe->dirty & LP_NEW_CONSTANTS) + lp_setup_set_fs_constants(llvmpipe->setup, llvmpipe->constants[PIPE_SHADER_FRAGMENT].buffer); + + if (llvmpipe->dirty & LP_NEW_TEXTURE) + lp_setup_set_sampler_textures(llvmpipe->setup, llvmpipe->num_textures, llvmpipe->texture); llvmpipe->dirty = 0; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 59c7afc6f7..63e675e584 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -681,16 +681,15 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, assert(shader < PIPE_SHADER_TYPES); assert(index == 0); + if(llvmpipe->constants[shader].buffer == buffer) + return; + if(shader == PIPE_SHADER_VERTEX) draw_flush(llvmpipe->draw); /* note: reference counting */ pipe_buffer_reference(&llvmpipe->constants[shader].buffer, buffer); - if(shader == PIPE_SHADER_FRAGMENT) { - llvmpipe->jit_context.constants = data; - } - if(shader == PIPE_SHADER_VERTEX) { draw_set_mapped_constant_buffer(llvmpipe->draw, data, size); } diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index ae787801eb..e19394a4c9 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -96,16 +96,6 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe, struct pipe_texture *tex = i < num ? texture[i] : NULL; pipe_texture_reference(&llvmpipe->texture[i], tex); - - if(tex) { - struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex); - struct lp_jit_texture *jit_tex = &llvmpipe->jit_context.textures[i]; - jit_tex->width = tex->width[0]; - jit_tex->height = tex->height[0]; - jit_tex->stride = lp_tex->stride[0]; - if(!lp_tex->dt) - jit_tex->data = lp_tex->data; - } } llvmpipe->num_textures = num; -- cgit v1.2.3 From c0e3e35b03e6cbed3768cb56e298b6119eafe1ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Fri, 9 Oct 2009 13:44:24 +0100 Subject: llvmpipe: Add stub lp_setup_is_texture_referenced. --- src/gallium/drivers/llvmpipe/lp_setup.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index f999004a66..0a12d93c38 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -484,6 +484,15 @@ lp_setup_set_sampler_textures( struct setup_context *setup, setup->fs.jit_context_dirty = TRUE; } +boolean +lp_setup_is_texture_referenced( struct setup_context *setup, + const struct pipe_texture *texture ) +{ + /* FIXME */ + return PIPE_UNREFERENCED; +} + + static void lp_setup_set_shader_state( struct setup_context *setup, const struct lp_jit_context *jc ) -- cgit v1.2.3 From 1928c965b1fb76987cbc834111bd1d1e1f2cda51 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 13:53:14 +0100 Subject: llvmpipe: fix a couple of warnings --- src/gallium/drivers/llvmpipe/lp_setup.c | 9 --------- src/gallium/drivers/llvmpipe/lp_setup.h | 1 + 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 0a12d93c38..030a19ef30 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -493,15 +493,6 @@ lp_setup_is_texture_referenced( struct setup_context *setup, } -static void -lp_setup_set_shader_state( struct setup_context *setup, - const struct lp_jit_context *jc ) -{ - - -} - - /* Stubs for lines & points for now: */ void diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index ac9c3cc0ee..0dedc9e9fe 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -50,6 +50,7 @@ struct lp_shader_input { struct pipe_texture; struct pipe_surface; +struct pipe_buffer; struct pipe_blend_color; struct setup_context; struct lp_fragment_shader; -- cgit v1.2.3 From b0475a4b0d1eaa1179bc399301ed46b0b8e63497 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 13:55:04 +0100 Subject: llvmpipe: fix crash on init --- src/gallium/drivers/llvmpipe/lp_setup.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 030a19ef30..2eef63badc 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -229,8 +229,8 @@ begin_binning( struct setup_context *setup ) setup->fb.zsbuf->height); } - setup->tiles_x = align(setup->fb.width, TILESIZE); - setup->tiles_y = align(setup->fb.height, TILESIZE); + setup->tiles_x = align(setup->fb.width, TILESIZE) / TILESIZE; + setup->tiles_y = align(setup->fb.height, TILESIZE) / TILESIZE; if (setup->fb.cbuf) { if (setup->clear.flags & PIPE_CLEAR_COLOR) @@ -312,18 +312,10 @@ lp_setup_bind_framebuffer( struct setup_context *setup, struct pipe_surface *color, struct pipe_surface *zstencil ) { - unsigned width, height; - set_state( setup, SETUP_FLUSHED ); pipe_surface_reference( &setup->fb.cbuf, color ); pipe_surface_reference( &setup->fb.zsbuf, zstencil ); - - width = MAX2( color->width, zstencil->width ); - height = MAX2( color->height, zstencil->height ); - - setup->tiles_x = align( width, TILESIZE ) / TILESIZE; - setup->tiles_y = align( height, TILESIZE ) / TILESIZE; } void -- cgit v1.2.3 From 4e1334ced68dd25b151250a44af25e8e0d5a33fe Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 14:02:39 +0100 Subject: llvmpipe: debug, crash fixes --- src/gallium/drivers/llvmpipe/lp_rast.c | 28 +++++++++++++++++++++++++++- src/gallium/drivers/llvmpipe/lp_setup.c | 6 ------ 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index de15ddbb2e..fff292e294 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -33,6 +33,7 @@ #include "lp_tile_soa.h" #include "lp_bld_debug.h" +#define RAST_DEBUG debug_printf struct lp_rasterizer *lp_rast_create( void ) { @@ -53,6 +54,8 @@ void lp_rast_begin( struct lp_rasterizer *rast, unsigned width, unsigned height ) { + RAST_DEBUG("%s %dx%d\n", __FUNCTION__, width, height); + rast->width = width; rast->height = height; rast->check_for_clipped_tiles = (width % TILESIZE != 0 || @@ -63,6 +66,8 @@ void lp_rast_bind_color( struct lp_rasterizer *rast, struct pipe_surface *cbuf, boolean write_color ) { + RAST_DEBUG("%s\n", __FUNCTION__); + pipe_surface_reference(&rast->state.cbuf, cbuf); rast->state.write_color = write_color; } @@ -71,6 +76,8 @@ void lp_rast_bind_zstencil( struct lp_rasterizer *rast, struct pipe_surface *zsbuf, boolean write_zstencil ) { + RAST_DEBUG("%s\n", __FUNCTION__); + pipe_surface_reference(&rast->state.zsbuf, zsbuf); rast->state.write_zstencil = write_zstencil; } @@ -82,6 +89,8 @@ void lp_rast_start_tile( struct lp_rasterizer *rast, unsigned x, unsigned y ) { + RAST_DEBUG("%s\n", __FUNCTION__); + rast->x = x; rast->y = y; } @@ -91,6 +100,8 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, { const uint8_t *clear_color = arg.clear_color; + RAST_DEBUG("%s\n", __FUNCTION__); + if (clear_color[0] == clear_color[1] && clear_color[1] == clear_color[2] && clear_color[2] == clear_color[3]) { @@ -110,6 +121,8 @@ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, { unsigned i, j; + RAST_DEBUG("%s\n", __FUNCTION__); + for (i = 0; i < TILE_SIZE; i++) for (j = 0; j < TILE_SIZE; j++) rast->tile.depth[i*TILE_SIZE + j] = arg.clear_zstencil; @@ -119,12 +132,16 @@ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, void lp_rast_load_color( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg) { + RAST_DEBUG("%s\n", __FUNCTION__); + /* call u_tile func to load colors from surface */ } void lp_rast_load_zstencil( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { + RAST_DEBUG("%s\n", __FUNCTION__); + /* call u_tile func to load depth (and stencil?) from surface */ } @@ -133,8 +150,9 @@ void lp_rast_load_zstencil( struct lp_rasterizer *rast, void lp_rast_set_state( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { - rast->shader_state = arg.set_state; + RAST_DEBUG("%s\n", __FUNCTION__); + rast->shader_state = arg.set_state; } @@ -145,6 +163,8 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, const unsigned masks[4] = {~0, ~0, ~0, ~0}; unsigned x, y; + RAST_DEBUG("%s\n", __FUNCTION__); + /* Use the existing preference for 8x2 (four quads) shading: */ for (y = 0; y < TILE_SIZE; y += 2) @@ -218,6 +238,8 @@ static void lp_rast_store_color( struct lp_rasterizer *rast ) unsigned h = TILE_SIZE; void *map; + RAST_DEBUG("%s\n", __FUNCTION__); + surface = rast->state.cbuf; if(!surface) return; @@ -256,12 +278,16 @@ static void lp_rast_store_color( struct lp_rasterizer *rast ) static void lp_rast_store_zstencil( struct lp_rasterizer *rast ) { + RAST_DEBUG("%s\n", __FUNCTION__); + /* FIXME: call u_tile func to store depth/stencil to surface */ } void lp_rast_end_tile( struct lp_rasterizer *rast ) { + RAST_DEBUG("%s\n", __FUNCTION__); + if (rast->state.write_color) lp_rast_store_color(rast); diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 2eef63badc..009c641976 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -164,12 +164,6 @@ rasterize_bins( struct setup_context *setup, struct cmd_block *block; unsigned i,j,k; - if (setup->state != SETUP_ACTIVE) { - /* this can happen, not a big deal */ - debug_printf("%s called when not binning\n", __FUNCTION__); - return; - } - lp_rast_begin( rast, setup->fb.width, setup->fb.height ); -- cgit v1.2.3 From 659609e0ae27071a601794935c85547e315dedeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Fri, 9 Oct 2009 14:03:11 +0100 Subject: llvmpipe: Replace util_pack_color with straight float_to_ubyte. --- src/gallium/drivers/llvmpipe/lp_setup.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 009c641976..ec1027bb40 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -319,10 +319,11 @@ lp_setup_clear( struct setup_context *setup, unsigned stencil, unsigned flags ) { + unsigned i; + if (flags & PIPE_CLEAR_COLOR) { - util_pack_color(color, - setup->fb.cbuf->format, - &setup->clear.color.clear_color ); + for (i = 0; i < 4; ++i) + setup->clear.color.clear_color[i] = float_to_ubyte(color[i]); } if (flags & PIPE_CLEAR_DEPTHSTENCIL) { -- cgit v1.2.3 From 295aea04895676aae5b67a7016c62bab8e40b996 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 14:07:25 +0100 Subject: llvmpipe: more debug --- src/gallium/drivers/llvmpipe/lp_rast.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index fff292e294..beb149ef18 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -89,7 +89,7 @@ void lp_rast_start_tile( struct lp_rasterizer *rast, unsigned x, unsigned y ) { - RAST_DEBUG("%s\n", __FUNCTION__); + RAST_DEBUG("%s %d,%d\n", __FUNCTION__, x, y); rast->x = x; rast->y = y; @@ -100,7 +100,11 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, { const uint8_t *clear_color = arg.clear_color; - RAST_DEBUG("%s\n", __FUNCTION__); + RAST_DEBUG("%s %x,%x,%x,%x\n", __FUNCTION__, + clear_color[0], + clear_color[1], + clear_color[2], + clear_color[3]); if (clear_color[0] == clear_color[1] && clear_color[1] == clear_color[2] && @@ -238,7 +242,7 @@ static void lp_rast_store_color( struct lp_rasterizer *rast ) unsigned h = TILE_SIZE; void *map; - RAST_DEBUG("%s\n", __FUNCTION__); + RAST_DEBUG("%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); surface = rast->state.cbuf; if(!surface) -- cgit v1.2.3 From e0e2008f1dcd73a59a184e0ef4c1dd77ac2a1cbf Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 14:29:25 +0100 Subject: llvmpipe: trivial/clear works --- src/gallium/drivers/llvmpipe/lp_context.c | 2 +- src/gallium/drivers/llvmpipe/lp_rast.c | 120 ++++++++++++++-------------- src/gallium/drivers/llvmpipe/lp_rast.h | 21 +++-- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 11 ++- src/gallium/drivers/llvmpipe/lp_setup.c | 16 ++-- src/gallium/drivers/llvmpipe/lp_setup.h | 3 +- 6 files changed, 92 insertions(+), 81 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 7f7b04412c..06aa032540 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -179,7 +179,7 @@ llvmpipe_create( struct pipe_screen *screen ) if (debug_get_bool_option( "LP_NO_RAST", FALSE )) llvmpipe->no_rast = TRUE; - llvmpipe->setup = lp_setup_create(); + llvmpipe->setup = lp_setup_create( screen ); if (!llvmpipe->setup) goto fail; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index beb149ef18..977f35c46c 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -35,7 +35,7 @@ #define RAST_DEBUG debug_printf -struct lp_rasterizer *lp_rast_create( void ) +struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen ) { struct lp_rasterizer *rast; @@ -43,6 +43,7 @@ struct lp_rasterizer *lp_rast_create( void ) if(!rast) return NULL; + rast->screen = screen; rast->tile.color = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); rast->tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); @@ -50,39 +51,75 @@ struct lp_rasterizer *lp_rast_create( void ) } -void lp_rast_begin( struct lp_rasterizer *rast, - unsigned width, - unsigned height ) +boolean lp_rast_begin( struct lp_rasterizer *rast, + struct pipe_surface *cbuf, + struct pipe_surface *zsbuf, + boolean write_color, + boolean write_zstencil, + unsigned width, + unsigned height ) { + struct pipe_screen *screen = rast->screen; + RAST_DEBUG("%s %dx%d\n", __FUNCTION__, width, height); + pipe_surface_reference(&rast->state.cbuf, cbuf); + pipe_surface_reference(&rast->state.zsbuf, zsbuf); + rast->width = width; rast->height = height; + rast->state.write_zstencil = write_zstencil; + rast->state.write_color = write_color; + rast->check_for_clipped_tiles = (width % TILESIZE != 0 || height % TILESIZE != 0); -} -void lp_rast_bind_color( struct lp_rasterizer *rast, - struct pipe_surface *cbuf, - boolean write_color ) -{ - RAST_DEBUG("%s\n", __FUNCTION__); + if (cbuf) { + rast->cbuf_transfer = screen->get_tex_transfer(rast->screen, + cbuf->texture, + cbuf->face, + cbuf->level, + cbuf->zslice, + PIPE_TRANSFER_READ_WRITE, + 0, 0, width, height); + if (!rast->cbuf_transfer) + return FALSE; + + rast->cbuf_map = screen->transfer_map(rast->screen, + rast->cbuf_transfer); + if (!rast->cbuf_map) + return FALSE; + } - pipe_surface_reference(&rast->state.cbuf, cbuf); - rast->state.write_color = write_color; + return TRUE; } -void lp_rast_bind_zstencil( struct lp_rasterizer *rast, - struct pipe_surface *zsbuf, - boolean write_zstencil ) + +void lp_rast_end( struct lp_rasterizer *rast ) { - RAST_DEBUG("%s\n", __FUNCTION__); + struct pipe_screen *screen = rast->screen; - pipe_surface_reference(&rast->state.zsbuf, zsbuf); - rast->state.write_zstencil = write_zstencil; + if (rast->cbuf_map) + screen->transfer_unmap(screen, rast->cbuf_transfer); + + if (rast->zsbuf_map) + screen->transfer_unmap(screen, rast->zsbuf_transfer); + + if (rast->cbuf_transfer) + screen->tex_transfer_destroy(rast->cbuf_transfer); + + if (rast->zsbuf_transfer) + screen->tex_transfer_destroy(rast->cbuf_transfer); + + rast->cbuf_transfer = NULL; + rast->zsbuf_transfer = NULL; + rast->cbuf_map = NULL; + rast->zsbuf_map = NULL; } + + /* Begining of each tile: */ void lp_rast_start_tile( struct lp_rasterizer *rast, @@ -233,50 +270,17 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, static void lp_rast_store_color( struct lp_rasterizer *rast ) { - struct pipe_surface *surface; - struct pipe_screen *screen; - struct pipe_transfer *transfer; const unsigned x = rast->x; const unsigned y = rast->y; - unsigned w = TILE_SIZE; - unsigned h = TILE_SIZE; - void *map; - - RAST_DEBUG("%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); - - surface = rast->state.cbuf; - if(!surface) - return; - - screen = surface->texture->screen; - - if(x + w > rast->width) - w = rast->width - x; - if(y + h > rast->height) - h = rast->height - y; - - transfer = screen->get_tex_transfer(screen, - surface->texture, - surface->face, - surface->level, - surface->zslice, - PIPE_TRANSFER_READ_WRITE, - x, y, w, h); - if(!transfer) - return; - - map = screen->transfer_map(screen, transfer); - if(map) { - lp_tile_write_4ub(transfer->format, - rast->tile.color, - map, transfer->stride, - x, y, w, h); - - screen->transfer_unmap(screen, transfer); - } - screen->tex_transfer_destroy(transfer); + RAST_DEBUG("%s %d,%d\n", __FUNCTION__, x, y); + lp_tile_write_4ub(rast->cbuf_transfer->format, + rast->tile.color, + rast->cbuf_map, + rast->cbuf_transfer->stride, + x, y, + TILESIZE, TILESIZE); } diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 72f897503d..9dfdf25cda 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -37,6 +37,7 @@ * individual function calls like this. */ struct lp_rasterizer; +struct pipe_screen; #define TILESIZE 64 @@ -118,19 +119,17 @@ struct lp_rast_triangle { -struct lp_rasterizer *lp_rast_create( void ); +struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen ); -void lp_rast_begin( struct lp_rasterizer *, - unsigned width, - unsigned height); +boolean lp_rast_begin( struct lp_rasterizer *rast, + struct pipe_surface *cbuf, + struct pipe_surface *zsbuf, + boolean write_color, + boolean write_zstencil, + unsigned width, + unsigned height ); -void lp_rast_bind_color( struct lp_rasterizer *, - struct pipe_surface *cbuf, - boolean write_when_done ); - -void lp_rast_bind_zstencil( struct lp_rasterizer *, - struct pipe_surface *zsbuf, - boolean write_when_done ); +void lp_rast_end( struct lp_rasterizer * ); /* Begining of each tile: */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index f5a6699ed4..eae8138aaf 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -30,6 +30,8 @@ #include "lp_rast.h" +struct pipe_transfer; +struct pipe_screen; /* We can choose whatever layout for the internal tile storage we * prefer: @@ -49,7 +51,6 @@ struct lp_rasterizer { */ struct lp_rast_tile tile; - unsigned x; unsigned y; boolean clipped_tile; @@ -57,7 +58,13 @@ struct lp_rasterizer { boolean check_for_clipped_tiles; unsigned width; unsigned height; - + + struct pipe_screen *screen; + struct pipe_transfer *cbuf_transfer; + struct pipe_transfer *zsbuf_transfer; + void *cbuf_map; + void *zsbuf_map; + struct { struct pipe_surface *cbuf; struct pipe_surface *zsbuf; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index ec1027bb40..ba9d801032 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -165,16 +165,14 @@ rasterize_bins( struct setup_context *setup, unsigned i,j,k; lp_rast_begin( rast, + setup->fb.cbuf, + setup->fb.zsbuf, + setup->fb.cbuf != NULL, + setup->fb.zsbuf != NULL && write_depth, setup->fb.width, setup->fb.height ); - lp_rast_bind_color( rast, - setup->fb.cbuf, - setup->fb.cbuf != NULL ); - lp_rast_bind_zstencil( rast, - setup->fb.zsbuf, - setup->fb.zsbuf != NULL && write_depth ); for (i = 0; i < setup->tiles_x; i++) { for (j = 0; j < setup->tiles_y; j++) { @@ -193,6 +191,8 @@ rasterize_bins( struct setup_context *setup, } } + lp_rast_end( rast ); + reset_context( setup ); } @@ -528,12 +528,12 @@ lp_setup_destroy( struct setup_context *setup ) * rasterizer to use with it. */ struct setup_context * -lp_setup_create( void ) +lp_setup_create( struct pipe_screen *screen ) { struct setup_context *setup = CALLOC_STRUCT(setup_context); unsigned i, j; - setup->rast = lp_rast_create(); + setup->rast = lp_rast_create( screen ); if (!setup->rast) goto fail; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 0dedc9e9fe..1edd7410fc 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -52,12 +52,13 @@ struct pipe_texture; struct pipe_surface; struct pipe_buffer; struct pipe_blend_color; +struct pipe_screen; struct setup_context; struct lp_fragment_shader; struct lp_jit_context; struct setup_context * -lp_setup_create( void ); +lp_setup_create( struct pipe_screen *screen ); void lp_setup_clear(struct setup_context *setup, -- cgit v1.2.3 From 608c22272327d3b554c7665b60f6322716e5fd9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Fri, 9 Oct 2009 14:30:25 +0100 Subject: llvmpipe: Put jit_context in store. --- src/gallium/drivers/llvmpipe/lp_setup.c | 26 +++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_setup_context.h | 2 ++ 2 files changed, 28 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index ba9d801032..8a9c169634 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -480,12 +480,36 @@ lp_setup_is_texture_referenced( struct setup_context *setup, } +static INLINE void +lp_setup_update_shader_state( struct setup_context *setup ) +{ + + if(setup->fs.jit_context_dirty) { + if(!setup->fs.last_jc || + memcmp(setup->fs.last_jc, &setup->fs.jit_context, sizeof *setup->fs.last_jc)) { + struct lp_jit_context *jc; + + jc = get_data(&setup->data, sizeof *jc); + if(jc) { + memcpy(jc, &setup->fs.jit_context, sizeof *jc); + setup->fs.last_jc = jc; + } + } + + setup->fs.jit_context_dirty = FALSE; + } + + assert(setup->fs.last_jc); +} + + /* Stubs for lines & points for now: */ void lp_setup_point(struct setup_context *setup, const float (*v0)[4]) { + lp_setup_update_shader_state(setup); setup->point( setup, v0 ); } @@ -494,6 +518,7 @@ lp_setup_line(struct setup_context *setup, const float (*v0)[4], const float (*v1)[4]) { + lp_setup_update_shader_state(setup); setup->line( setup, v0, v1 ); } @@ -503,6 +528,7 @@ lp_setup_tri(struct setup_context *setup, const float (*v1)[4], const float (*v2)[4]) { + lp_setup_update_shader_state(setup); setup->triangle( setup, v0, v1, v2 ); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 2e2380dd80..747e90fe20 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -114,6 +114,8 @@ struct setup_context { lp_jit_frag_func jit_function; boolean jit_context_dirty; + + const struct lp_jit_context *last_jc; } fs; void (*point)( struct setup_context *, -- cgit v1.2.3 From 163a31952c903034c8a70213b344e1b2ef287270 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 14:33:57 +0100 Subject: llvmpipe: initialize setup line/tri/point funcs --- src/gallium/drivers/llvmpipe/lp_setup.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 8a9c169634..47839869ac 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -568,6 +568,10 @@ lp_setup_create( struct pipe_screen *screen ) setup->tile[i][j].head = setup->tile[i][j].tail = CALLOC_STRUCT(cmd_block); + setup->triangle = first_triangle; + setup->line = first_line; + setup->point = first_point; + return setup; fail: -- cgit v1.2.3 From 082b3b0a895615a60a7eae40fea14bf231960dba Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 14:36:41 +0100 Subject: llvmpipe: initialize setup data store --- src/gallium/drivers/llvmpipe/lp_setup.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 47839869ac..336a8b4e5b 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -568,6 +568,9 @@ lp_setup_create( struct pipe_screen *screen ) setup->tile[i][j].head = setup->tile[i][j].tail = CALLOC_STRUCT(cmd_block); + setup->data.head = + setup->data.tail = CALLOC_STRUCT(data_block); + setup->triangle = first_triangle; setup->line = first_line; setup->point = first_point; -- cgit v1.2.3 From dec35d04aeb398eef159aaf8cde5e0d04622b811 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 14:59:35 +0100 Subject: llvmpipe: add LP_DEBUG env var --- src/gallium/drivers/llvmpipe/lp_debug.h | 71 ++++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_screen.c | 22 +++++++++ src/gallium/drivers/llvmpipe/lp_state_fs.c | 67 +++++++++++++--------------- 3 files changed, 124 insertions(+), 36 deletions(-) create mode 100644 src/gallium/drivers/llvmpipe/lp_debug.h diff --git a/src/gallium/drivers/llvmpipe/lp_debug.h b/src/gallium/drivers/llvmpipe/lp_debug.h new file mode 100644 index 0000000000..74b2757494 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_debug.h @@ -0,0 +1,71 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef LP_DEBUG_H +#define LP_DEBUG_H + +#include "pipe/p_compiler.h" +#include "util/u_debug.h" + +extern void +st_print_current(void); + + +#define DEBUG_PIPE 0x1 +#define DEBUG_TGSI 0x2 +#define DEBUG_TEX 0x4 +#define DEBUG_ASM 0x8 +#define DEBUG_SETUP 0x10 +#define DEBUG_RAST 0x20 +#define DEBUG_QUERY 0x40 +#define DEBUG_SCREEN 0x80 +#define DEBUG_JIT 0x100 + +#ifdef DEBUG +extern int LP_DEBUG; +#else +#define LP_DEBUG 0 +#endif + +void st_debug_init( void ); + +static INLINE void +LP_DBG( unsigned flag, const char *fmt, ... ) +{ + if (LP_DEBUG & flag) + { + va_list args; + + va_start( args, fmt ); + debug_vprintf( fmt, args ); + va_end( args ); + } +} + + +#endif /* LP_DEBUG_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 0518927458..87fddbd13f 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -35,6 +35,24 @@ #include "lp_winsys.h" #include "lp_jit.h" #include "lp_screen.h" +#include "lp_debug.h" + +#ifdef DEBUG +int LP_DEBUG = 0; + +static const struct debug_named_value lp_debug_flags[] = { + { "pipe", DEBUG_PIPE }, + { "tgsi", DEBUG_TGSI }, + { "tex", DEBUG_TEX }, + { "asm", DEBUG_ASM }, + { "setup", DEBUG_SETUP }, + { "rast", DEBUG_RAST }, + { "query", DEBUG_QUERY }, + { "screen", DEBUG_SCREEN }, + { "jit", DEBUG_JIT }, + {NULL, 0} +}; +#endif static const char * @@ -213,6 +231,10 @@ llvmpipe_create_screen(struct llvmpipe_winsys *winsys) { struct llvmpipe_screen *screen = CALLOC_STRUCT(llvmpipe_screen); +#ifdef DEBUG + LP_DEBUG = debug_get_flags_option("LP_DEBUG", lp_debug_flags, 0 ); +#endif + if (!screen) return NULL; diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 63e675e584..a12581a486 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -86,6 +86,7 @@ #include "lp_buffer.h" #include "lp_state.h" #include "lp_tex_sample.h" +#include "lp_debug.h" static const unsigned char quad_offset_x[4] = {0, 1, 0, 1}; @@ -396,29 +397,29 @@ generate_fragment(struct llvmpipe_context *lp, unsigned i; unsigned chan; -#ifdef DEBUG - tgsi_dump(shader->base.tokens, 0); - if(key->depth.enabled) { - debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE)); - debug_printf("depth.writemask = %u\n", key->depth.writemask); - } - if(key->alpha.enabled) { - debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE)); - debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value); - } - if(key->blend.logicop_enable) { - debug_printf("blend.logicop_func = %u\n", key->blend.logicop_func); - } - else if(key->blend.blend_enable) { - debug_printf("blend.rgb_func = %s\n", debug_dump_blend_func (key->blend.rgb_func, TRUE)); - debug_printf("rgb_src_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE)); - debug_printf("rgb_dst_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE)); - debug_printf("alpha_func = %s\n", debug_dump_blend_func (key->blend.alpha_func, TRUE)); - debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE)); - debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE)); + if (LP_DEBUG & DEBUG_JIT) { + tgsi_dump(shader->base.tokens, 0); + if(key->depth.enabled) { + debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE)); + debug_printf("depth.writemask = %u\n", key->depth.writemask); + } + if(key->alpha.enabled) { + debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE)); + debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value); + } + if(key->blend.logicop_enable) { + debug_printf("blend.logicop_func = %u\n", key->blend.logicop_func); + } + else if(key->blend.blend_enable) { + debug_printf("blend.rgb_func = %s\n", debug_dump_blend_func (key->blend.rgb_func, TRUE)); + debug_printf("rgb_src_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE)); + debug_printf("rgb_dst_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE)); + debug_printf("alpha_func = %s\n", debug_dump_blend_func (key->blend.alpha_func, TRUE)); + debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE)); + debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE)); + } + debug_printf("blend.colormask = 0x%x\n", key->blend.colormask); } - debug_printf("blend.colormask = 0x%x\n", key->blend.colormask); -#endif variant = CALLOC_STRUCT(lp_fragment_shader_variant); if(!variant) @@ -509,13 +510,8 @@ generate_fragment(struct llvmpipe_context *lp, a0_ptr, dadx_ptr, dady_ptr, x0, y0, 2, 0); -#if 0 - /* C texture sampling */ - sampler = lp_c_sampler_soa_create(context_ptr); -#else /* code generated texture sampling */ sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); -#endif for(i = 0; i < num_fs; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); @@ -558,8 +554,8 @@ generate_fragment(struct llvmpipe_context *lp, } lp_build_conv_mask(builder, fs_type, blend_type, - fs_mask, num_fs, - &blend_mask, 1); + fs_mask, num_fs, + &blend_mask, 1); /* * Blending. @@ -588,16 +584,15 @@ generate_fragment(struct llvmpipe_context *lp, LLVMRunFunctionPassManager(screen->pass, variant->function); -#ifdef DEBUG - LLVMDumpValue(variant->function); - debug_printf("\n"); -#endif + if (LP_DEBUG & DEBUG_JIT) { + LLVMDumpValue(variant->function); + debug_printf("\n"); + } variant->jit_function = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, variant->function); -#ifdef DEBUG - lp_disassemble(variant->jit_function); -#endif + if (LP_DEBUG & DEBUG_ASM) + lp_disassemble(variant->jit_function); variant->next = shader->variants; shader->variants = variant; -- cgit v1.2.3 From 402c189af7e95be99ba2e5fd71a71987ffd73c2f Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 15:07:18 +0100 Subject: llvmpipe: always call begin_binning on transition to active state --- src/gallium/drivers/llvmpipe/lp_setup.c | 39 +++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 336a8b4e5b..793b71e095 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -42,6 +42,8 @@ #include "lp_texture.h" #include "lp_setup_context.h" +#define SETUP_DEBUG debug_printf + static void set_state( struct setup_context *, unsigned ); void lp_setup_new_cmd_block( struct cmd_block_list *list ) @@ -97,6 +99,8 @@ static void reset_context( struct setup_context *setup ) { unsigned i, j; + SETUP_DEBUG("%s\n", __FUNCTION__); + /* Free binner command lists: */ for (i = 0; i < setup->tiles_x; i++) { @@ -164,6 +168,8 @@ rasterize_bins( struct setup_context *setup, struct cmd_block *block; unsigned i,j,k; + SETUP_DEBUG("%s\n", __FUNCTION__); + lp_rast_begin( rast, setup->fb.cbuf, setup->fb.zsbuf, @@ -201,6 +207,8 @@ rasterize_bins( struct setup_context *setup, static void begin_binning( struct setup_context *setup ) { + SETUP_DEBUG("%s\n", __FUNCTION__); + if (!setup->fb.cbuf && !setup->fb.zsbuf) { setup->fb.width = 0; setup->fb.height = 0; @@ -254,6 +262,8 @@ begin_binning( struct setup_context *setup ) static void execute_clears( struct setup_context *setup ) { + SETUP_DEBUG("%s\n", __FUNCTION__); + begin_binning( setup ); rasterize_bins( setup, TRUE ); } @@ -268,10 +278,11 @@ set_state( struct setup_context *setup, if (old_state == new_state) return; + SETUP_DEBUG("%s old %d new %d\n", __FUNCTION__, old_state, new_state); + switch (new_state) { case SETUP_ACTIVE: - if (old_state == SETUP_FLUSHED) - begin_binning( setup ); + begin_binning( setup ); break; case SETUP_CLEARED: @@ -297,6 +308,8 @@ void lp_setup_flush( struct setup_context *setup, unsigned flags ) { + SETUP_DEBUG("%s\n", __FUNCTION__); + set_state( setup, SETUP_FLUSHED ); } @@ -306,6 +319,8 @@ lp_setup_bind_framebuffer( struct setup_context *setup, struct pipe_surface *color, struct pipe_surface *zstencil ) { + SETUP_DEBUG("%s\n", __FUNCTION__); + set_state( setup, SETUP_FLUSHED ); pipe_surface_reference( &setup->fb.cbuf, color ); @@ -321,6 +336,9 @@ lp_setup_clear( struct setup_context *setup, { unsigned i; + SETUP_DEBUG("%s state %d\n", __FUNCTION__, setup->state); + + if (flags & PIPE_CLEAR_COLOR) { for (i = 0; i < 4; ++i) setup->clear.color.clear_color[i] = float_to_ubyte(color[i]); @@ -368,6 +386,8 @@ lp_setup_set_triangle_state( struct setup_context *setup, unsigned cull_mode, boolean ccw_is_frontface) { + SETUP_DEBUG("%s\n", __FUNCTION__); + setup->ccw_is_frontface = ccw_is_frontface; setup->cullmode = cull_mode; setup->triangle = first_triangle; @@ -380,6 +400,8 @@ lp_setup_set_fs_inputs( struct setup_context *setup, const struct lp_shader_input *input, unsigned nr ) { + SETUP_DEBUG("%s\n", __FUNCTION__); + memcpy( setup->fs.input, input, nr * sizeof input[0] ); setup->fs.nr_inputs = nr; } @@ -388,6 +410,7 @@ void lp_setup_set_fs( struct setup_context *setup, struct lp_fragment_shader *fs ) { + SETUP_DEBUG("%s\n", __FUNCTION__); /* FIXME: reference count */ setup->fs.jit_function = fs->current->jit_function; @@ -400,6 +423,8 @@ lp_setup_set_fs_constants(struct setup_context *setup, const void *data = buffer ? llvmpipe_buffer(buffer)->data : NULL; struct pipe_buffer *dummy; + SETUP_DEBUG("%s\n", __FUNCTION__); + /* FIXME: hold on to the reference */ dummy = NULL; pipe_buffer_reference(&dummy, buffer); @@ -414,6 +439,8 @@ void lp_setup_set_alpha_ref_value( struct setup_context *setup, float alpha_ref_value ) { + SETUP_DEBUG("%s\n", __FUNCTION__); + if(setup->fs.jit_context.alpha_ref_value != alpha_ref_value) { setup->fs.jit_context.alpha_ref_value = alpha_ref_value; setup->fs.jit_context_dirty = TRUE; @@ -426,6 +453,8 @@ lp_setup_set_blend_color( struct setup_context *setup, { unsigned i, j; + SETUP_DEBUG("%s\n", __FUNCTION__); + if(!setup->fs.jit_context.blend_color) setup->fs.jit_context.blend_color = align_malloc(4 * 16, 16); @@ -445,6 +474,9 @@ lp_setup_set_sampler_textures( struct setup_context *setup, struct pipe_texture *dummy; unsigned i; + SETUP_DEBUG("%s\n", __FUNCTION__); + + assert(num <= PIPE_MAX_SAMPLERS); for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { @@ -483,6 +515,7 @@ lp_setup_is_texture_referenced( struct setup_context *setup, static INLINE void lp_setup_update_shader_state( struct setup_context *setup ) { + SETUP_DEBUG("%s\n", __FUNCTION__); if(setup->fs.jit_context_dirty) { if(!setup->fs.last_jc || @@ -528,6 +561,8 @@ lp_setup_tri(struct setup_context *setup, const float (*v1)[4], const float (*v2)[4]) { + SETUP_DEBUG("%s\n", __FUNCTION__); + lp_setup_update_shader_state(setup); setup->triangle( setup, v0, v1, v2 ); } -- cgit v1.2.3 From da1808ccc9a53fdd5aa69efa800ece0d3d075b07 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 15:10:15 +0100 Subject: llvmpipe: just bin whole tiles for now --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 2 ++ src/gallium/drivers/llvmpipe/lp_setup_tri.c | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 7110afb9d5..1041cd2463 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -188,6 +188,8 @@ void lp_rast_triangle( struct lp_rasterizer *rast, float x0, y0; float c1, c2, c3; + debug_printf("%s\n", __FUNCTION__); + if (miny == maxy || minx == maxx) { debug_printf("%s: non-intersecting triangle in bin\n", __FUNCTION__); return; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 5c402259df..857fb6a9f8 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -421,10 +421,15 @@ do_triangle_ccw(struct setup_context *setup, } else { +#if 1 + bin_command( &setup->tile[x][y], lp_rast_shade_tile, + lp_rast_arg_inputs(&tri->inputs) ); +#else /* shade partial tile */ bin_command( &setup->tile[x][y], lp_rast_triangle, lp_rast_arg_triangle(tri) ); +#endif } /* Iterate cx values across the region: -- cgit v1.2.3 From f406ffaea62005157f56ea17709291326c4dca8a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 15:29:10 +0100 Subject: llvmpipe: set block count/used values back to zero on reset --- src/gallium/drivers/llvmpipe/lp_setup.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 793b71e095..03c54798dc 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -115,6 +115,7 @@ static void reset_context( struct setup_context *setup ) } list->head = list->tail; + list->head->count = 0; } } @@ -130,6 +131,7 @@ static void reset_context( struct setup_context *setup ) } list->head = list->tail; + list->head->used = 0; } /* Reset some state: -- cgit v1.2.3 From 85999695829823e459e11822b4846ed1db5c055d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Fri, 9 Oct 2009 15:52:18 +0100 Subject: llvmpipe: Get jit_context/jit_function across the rasterizer. --- src/gallium/drivers/llvmpipe/lp_rast.c | 28 +++++-------- src/gallium/drivers/llvmpipe/lp_rast.h | 4 +- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 2 - src/gallium/drivers/llvmpipe/lp_setup.c | 53 ++++++++++++++----------- src/gallium/drivers/llvmpipe/lp_setup_context.h | 9 ++--- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 2 + src/gallium/drivers/llvmpipe/lp_state_fs.c | 3 ++ 7 files changed, 50 insertions(+), 51 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 977f35c46c..cba50c8049 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -188,14 +188,6 @@ void lp_rast_load_zstencil( struct lp_rasterizer *rast, /* Within a tile: */ -void lp_rast_set_state( struct lp_rasterizer *rast, - const union lp_rast_cmd_arg arg ) -{ - RAST_DEBUG("%s\n", __FUNCTION__); - - rast->shader_state = arg.set_state; -} - void lp_rast_shade_tile( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) @@ -219,7 +211,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, unsigned x, unsigned y, const unsigned *masks) { - const struct lp_rast_state *state = rast->shader_state; + const struct lp_rast_state *state = inputs->state; struct lp_rast_tile *tile = &rast->tile; void *color; void *depth; @@ -249,17 +241,17 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, assert(lp_check_alignment(depth, 16)); assert(lp_check_alignment(color, 16)); - assert(lp_check_alignment(state->jc.blend_color, 16)); + assert(lp_check_alignment(state->jit_context.blend_color, 16)); /* run shader */ - state->shader( &state->jc, - x, y, - inputs->a0, - inputs->dadx, - inputs->dady, - &mask[0][0], - color, - depth); + state->jit_function( &state->jit_context, + x, y, + inputs->a0, + inputs->dadx, + inputs->dady, + &mask[0][0], + color, + depth); } diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 9dfdf25cda..f371b709df 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -45,12 +45,12 @@ struct pipe_screen; struct lp_rast_state { /* State for the shader: */ - struct lp_jit_context jc; + struct lp_jit_context jit_context; /* The shader itself. Probably we also need to pass a pointer to * the tile color/z/stencil data somehow: */ - lp_jit_frag_func shader; + lp_jit_frag_func jit_function; }; diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index eae8138aaf..11e8e78e79 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -74,8 +74,6 @@ struct lp_rasterizer { unsigned clear_depth; char clear_stencil; } state; - - const struct lp_rast_state *shader_state; }; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 03c54798dc..428d2d0085 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -415,7 +415,7 @@ lp_setup_set_fs( struct setup_context *setup, SETUP_DEBUG("%s\n", __FUNCTION__); /* FIXME: reference count */ - setup->fs.jit_function = fs->current->jit_function; + setup->fs.current.jit_function = fs ? fs->current->jit_function : NULL; } void @@ -431,9 +431,9 @@ lp_setup_set_fs_constants(struct setup_context *setup, dummy = NULL; pipe_buffer_reference(&dummy, buffer); - setup->fs.jit_context.constants = data; + setup->fs.current.jit_context.constants = data; - setup->fs.jit_context_dirty = TRUE; + setup->fs.dirty = TRUE; } @@ -443,9 +443,9 @@ lp_setup_set_alpha_ref_value( struct setup_context *setup, { SETUP_DEBUG("%s\n", __FUNCTION__); - if(setup->fs.jit_context.alpha_ref_value != alpha_ref_value) { - setup->fs.jit_context.alpha_ref_value = alpha_ref_value; - setup->fs.jit_context_dirty = TRUE; + if(setup->fs.current.jit_context.alpha_ref_value != alpha_ref_value) { + setup->fs.current.jit_context.alpha_ref_value = alpha_ref_value; + setup->fs.dirty = TRUE; } } @@ -457,16 +457,16 @@ lp_setup_set_blend_color( struct setup_context *setup, SETUP_DEBUG("%s\n", __FUNCTION__); - if(!setup->fs.jit_context.blend_color) - setup->fs.jit_context.blend_color = align_malloc(4 * 16, 16); + if(!setup->fs.current.jit_context.blend_color) + setup->fs.current.jit_context.blend_color = align_malloc(4 * 16, 16); for (i = 0; i < 4; ++i) { uint8_t c = float_to_ubyte(blend_color->color[i]); for (j = 0; j < 16; ++j) - setup->fs.jit_context.blend_color[i*4 + j] = c; + setup->fs.current.jit_context.blend_color[i*4 + j] = c; } - setup->fs.jit_context_dirty = TRUE; + setup->fs.dirty = TRUE; } void @@ -490,7 +490,8 @@ lp_setup_set_sampler_textures( struct setup_context *setup, if(tex) { struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex); - struct lp_jit_texture *jit_tex = &setup->fs.jit_context.textures[i]; + struct lp_jit_texture *jit_tex; + jit_tex = &setup->fs.current.jit_context.textures[i]; jit_tex->width = tex->width[0]; jit_tex->height = tex->height[0]; jit_tex->stride = lp_tex->stride[0]; @@ -502,7 +503,7 @@ lp_setup_set_sampler_textures( struct setup_context *setup, } } - setup->fs.jit_context_dirty = TRUE; + setup->fs.dirty = TRUE; } boolean @@ -519,22 +520,28 @@ lp_setup_update_shader_state( struct setup_context *setup ) { SETUP_DEBUG("%s\n", __FUNCTION__); - if(setup->fs.jit_context_dirty) { - if(!setup->fs.last_jc || - memcmp(setup->fs.last_jc, &setup->fs.jit_context, sizeof *setup->fs.last_jc)) { - struct lp_jit_context *jc; - - jc = get_data(&setup->data, sizeof *jc); - if(jc) { - memcpy(jc, &setup->fs.jit_context, sizeof *jc); - setup->fs.last_jc = jc; + assert(setup->fs.current.jit_function); + + if(setup->fs.dirty) { + if(!setup->fs.stored || + memcmp(setup->fs.stored, + &setup->fs.current, + sizeof setup->fs.current) != 0) { + struct lp_rast_state *stored; + + stored = get_data(&setup->data, sizeof *stored); + if(stored) { + memcpy(stored, + &setup->fs.current, + sizeof setup->fs.current); + setup->fs.stored = stored; } } - setup->fs.jit_context_dirty = FALSE; + setup->fs.dirty = FALSE; } - assert(setup->fs.last_jc); + assert(setup->fs.stored); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 747e90fe20..c15a59e4d1 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -110,12 +110,9 @@ struct setup_context { struct lp_shader_input input[PIPE_MAX_ATTRIBS]; unsigned nr_inputs; - struct lp_jit_context jit_context; - lp_jit_frag_func jit_function; - - boolean jit_context_dirty; - - const struct lp_jit_context *last_jc; + const struct lp_rast_state *stored; + struct lp_rast_state current; + boolean dirty; } fs; void (*point)( struct setup_context *, diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 857fb6a9f8..78e53292ec 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -272,6 +272,8 @@ do_triangle_ccw(struct setup_context *setup, float c1, c2, c3; int minx, maxx, miny, maxy; + tri->inputs.state = setup->fs.stored; + tri->dx12 = x1 - x2; tri->dx23 = x2 - x3; tri->dx31 = x3 - x1; diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index a12581a486..0541d36580 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -84,6 +84,7 @@ #include "lp_screen.h" #include "lp_context.h" #include "lp_buffer.h" +#include "lp_setup.h" #include "lp_state.h" #include "lp_tex_sample.h" #include "lp_debug.h" @@ -765,4 +766,6 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) variant = generate_fragment(lp, shader, &key); shader->current = variant; + + lp_setup_set_fs(lp->setup, shader); } -- cgit v1.2.3 From 82ec7f018d20e46e9c43ea467354dcfe4f03bae3 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 16:05:26 +0100 Subject: llvmpipe: correct binning maths for iterating over whole tiles --- src/gallium/drivers/llvmpipe/lp_rast.c | 5 +++++ src/gallium/drivers/llvmpipe/lp_setup_tri.c | 12 ++++++------ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index cba50c8049..d4f369d4d0 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -192,6 +192,7 @@ void lp_rast_load_zstencil( struct lp_rasterizer *rast, void lp_rast_shade_tile( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { +#if 0 const struct lp_rast_shader_inputs *inputs = arg.shade_tile; const unsigned masks[4] = {~0, ~0, ~0, ~0}; unsigned x, y; @@ -203,6 +204,10 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, for (y = 0; y < TILE_SIZE; y += 2) for (x = 0; x < TILE_SIZE; x += 8) lp_rast_shade_quads( rast, inputs, x, y, masks); +#else + RAST_DEBUG("%s\n", __FUNCTION__); + memset(rast->tile.color, 0x80, TILE_SIZE * TILE_SIZE * 4); +#endif } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 78e53292ec..c437940381 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -357,13 +357,13 @@ do_triangle_ccw(struct setup_context *setup, c2 = tri->c2 + tri->dx23 * miny - tri->dy23 * minx; c3 = tri->c3 + tri->dx31 * miny - tri->dy31 * minx; - /* Convert to tile coordinates: - */ minx /= TILESIZE; - maxx /= TILESIZE; miny /= TILESIZE; + maxx /= TILESIZE; maxy /= TILESIZE; - + + /* Convert to tile coordinates: + */ if (miny == maxy && minx == maxx) { /* Triangle is contained in a single tile: @@ -399,13 +399,13 @@ do_triangle_ccw(struct setup_context *setup, * Trivially accept or reject blocks, else jump to per-pixel * examination above. */ - for (y = miny; y < maxy; y++) + for (y = miny; y <= maxy; y++) { float cx1 = c1; float cx2 = c2; float cx3 = c3; - for (x = minx; x < maxx; x++) + for (x = minx; x <= maxx; x++) { if (cx1 + eo1 < 0 || cx2 + eo2 < 0 || -- cgit v1.2.3 From b0cd386e777912df115858d90f2eec31811c2d9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Fri, 9 Oct 2009 16:18:03 +0100 Subject: llvmpipe: Tell setup shader inputs. --- src/gallium/drivers/llvmpipe/lp_state_derived.c | 30 +++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 00903c8ef4..a18efcc0e0 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -144,6 +144,36 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) } draw_compute_vertex_size(vinfo); + + { + struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS]; + + for (i = 0; i < lpfs->info.num_inputs; i++) { + switch (vinfo->attrib[i].interp_mode) { + case INTERP_CONSTANT: + inputs[i].interp = LP_INTERP_CONSTANT; + break; + case INTERP_LINEAR: + inputs[i].interp = LP_INTERP_LINEAR; + break; + case INTERP_PERSPECTIVE: + inputs[i].interp = LP_INTERP_PERSPECTIVE; + break; + case INTERP_POS: + inputs[i].interp = LP_INTERP_POSITION; + break; + default: + assert(0); + } + + if (lpfs->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) + inputs[i].interp = LP_INTERP_FACING; + + inputs[i].src_index = vinfo->attrib[i].src_index; + } + + lp_setup_set_fs_inputs(llvmpipe->setup, inputs, lpfs->info.num_inputs); + } } return vinfo; -- cgit v1.2.3 From c2e926b72de21bfac0048f32e1204537446d5ab0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Fri, 9 Oct 2009 16:18:19 +0100 Subject: llvmpipe: Pass framebuffer coords to shader. --- src/gallium/drivers/llvmpipe/lp_rast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index d4f369d4d0..e73331535f 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -250,7 +250,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, /* run shader */ state->jit_function( &state->jit_context, - x, y, + rast->x + x, rast->y + y, inputs->a0, inputs->dadx, inputs->dady, -- cgit v1.2.3 From 05131f7502150968d7ee19673676f74d4c2fd22b Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 16:19:00 +0100 Subject: llvmpipe: properly clip tile writes --- src/gallium/drivers/llvmpipe/lp_rast.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index e73331535f..3585011ace 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -269,15 +269,23 @@ static void lp_rast_store_color( struct lp_rasterizer *rast ) { const unsigned x = rast->x; const unsigned y = rast->y; + unsigned w = TILESIZE; + unsigned h = TILESIZE; - RAST_DEBUG("%s %d,%d\n", __FUNCTION__, x, y); + if (x + w > rast->width) + w -= x + w - rast->width; + + if (y + h > rast->height) + h -= y + h - rast->height; + + RAST_DEBUG("%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); lp_tile_write_4ub(rast->cbuf_transfer->format, rast->tile.color, rast->cbuf_map, rast->cbuf_transfer->stride, x, y, - TILESIZE, TILESIZE); + w, h); } -- cgit v1.2.3 From 8c34c86d191fc703670d4e1e1ae4719cb39f8828 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Fri, 9 Oct 2009 16:21:25 +0100 Subject: llvmpipe: Undo debug override. --- src/gallium/drivers/llvmpipe/lp_rast.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 3585011ace..38c3aea921 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -192,7 +192,6 @@ void lp_rast_load_zstencil( struct lp_rasterizer *rast, void lp_rast_shade_tile( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { -#if 0 const struct lp_rast_shader_inputs *inputs = arg.shade_tile; const unsigned masks[4] = {~0, ~0, ~0, ~0}; unsigned x, y; @@ -204,10 +203,6 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, for (y = 0; y < TILE_SIZE; y += 2) for (x = 0; x < TILE_SIZE; x += 8) lp_rast_shade_quads( rast, inputs, x, y, masks); -#else - RAST_DEBUG("%s\n", __FUNCTION__); - memset(rast->tile.color, 0x80, TILE_SIZE * TILE_SIZE * 4); -#endif } -- cgit v1.2.3 From 6464ec48366fee201c61a481c3205a64279797b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Fri, 9 Oct 2009 16:50:32 +0100 Subject: llvmpipe: Remove partial tile override. --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index c437940381..3cb7a28604 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -423,15 +423,10 @@ do_triangle_ccw(struct setup_context *setup, } else { -#if 1 - bin_command( &setup->tile[x][y], lp_rast_shade_tile, - lp_rast_arg_inputs(&tri->inputs) ); -#else /* shade partial tile */ bin_command( &setup->tile[x][y], lp_rast_triangle, lp_rast_arg_triangle(tri) ); -#endif } /* Iterate cx values across the region: -- cgit v1.2.3 From b4924d62c7346da2e0de9ae4f9f23b3fb7fafee8 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 16:59:24 +0100 Subject: llvmpipe: fill in tri min/max values --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 3cb7a28604..1725614902 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -306,6 +306,11 @@ do_triangle_ccw(struct setup_context *setup, if (miny == maxy || minx == maxx) return; + tri->miny = miny; + tri->minx = minx; + tri->maxy = maxy; + tri->maxx = maxx; + /* The only divide in this code. Is it really needed? */ tri->oneoverarea = 1.0f / area; -- cgit v1.2.3 From 8c80413360855106734068066382be8c3a46a64f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Fri, 9 Oct 2009 17:14:11 +0100 Subject: llvmpipe: Fix type in tri bounding box check. --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 1041cd2463..6b5bee4af3 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -179,7 +179,7 @@ void lp_rast_triangle( struct lp_rasterizer *rast, /* Clamp to tile dimensions: */ - int minx = MAX2(tri->maxx, rast->x); + int minx = MAX2(tri->minx, rast->x); int miny = MAX2(tri->miny, rast->y); int maxx = MIN2(tri->maxx, rast->x + TILE_SIZE); int maxy = MIN2(tri->maxy, rast->y + TILE_SIZE); -- cgit v1.2.3 From 61f3eeb6403e404d297bdcd924c215ed36060945 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Fri, 9 Oct 2009 19:16:36 +0100 Subject: llvmpipe: Use framebuffer coords consistently. --- src/gallium/drivers/llvmpipe/lp_rast.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 38c3aea921..2038403c8f 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -202,7 +202,7 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, */ for (y = 0; y < TILE_SIZE; y += 2) for (x = 0; x < TILE_SIZE; x += 8) - lp_rast_shade_quads( rast, inputs, x, y, masks); + lp_rast_shade_quads( rast, inputs, rast->x + x, rast->y + y, masks); } @@ -211,6 +211,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, unsigned x, unsigned y, const unsigned *masks) { +#if 1 const struct lp_rast_state *state = inputs->state; struct lp_rast_tile *tile = &rast->tile; void *color; @@ -218,23 +219,27 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS]; unsigned chan_index; unsigned q; + unsigned ix, iy; /* Sanity checks */ assert(x % TILE_VECTOR_WIDTH == 0); assert(y % TILE_VECTOR_HEIGHT == 0); + ix = x % TILE_SIZE; + iy = y % TILE_SIZE; + /* mask */ for (q = 0; q < 4; ++q) for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index) mask[q][chan_index] = masks[q] & (1 << chan_index) ? ~0 : 0; /* color buffer */ - color = &TILE_PIXEL(tile->color, x, y, 0); + color = &TILE_PIXEL(tile->color, ix, iy, 0); /* depth buffer */ assert((x % 2) == 0); assert((y % 2) == 0); - depth = tile->depth + y*TILE_SIZE + 2*x; + depth = tile->depth + iy*TILE_SIZE + 2*ix; /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ assert(lp_check_alignment(mask, 16)); @@ -245,14 +250,30 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, /* run shader */ state->jit_function( &state->jit_context, - rast->x + x, rast->y + y, + x, y, inputs->a0, inputs->dadx, inputs->dady, &mask[0][0], color, depth); +#else + struct lp_rast_tile *tile = &rast->tile; + unsigned chan_index; + unsigned q, ix, iy; + + x %= TILE_SIZE; + y %= TILE_SIZE; + + /* mask */ + for (q = 0; q < 4; ++q) + for(iy = 0; iy < 2; ++iy) + for(ix = 0; ix < 2; ++ix) + if(masks[q] & (1 << (iy*2 + ix))) + for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index) + TILE_PIXEL(tile->color, x + q*2 + ix, y + iy, chan_index) = 0xff; +#endif } -- cgit v1.2.3 From 7908c239e0fdc11d878b8c68d126c3364af0ee24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Fri, 9 Oct 2009 19:17:30 +0100 Subject: llvmpipe: Additional checks for binner block lists. --- src/gallium/drivers/llvmpipe/lp_setup.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 428d2d0085..a74756de7c 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -101,7 +101,7 @@ static void reset_context( struct setup_context *setup ) SETUP_DEBUG("%s\n", __FUNCTION__); - /* Free binner command lists: + /* Free all but last binner command lists: */ for (i = 0; i < setup->tiles_x; i++) { for (j = 0; j < setup->tiles_y; j++) { @@ -114,12 +114,13 @@ static void reset_context( struct setup_context *setup ) FREE(block); } + assert(list->tail->next == NULL); list->head = list->tail; list->head->count = 0; } } - /* Free binned data: + /* Free all but last binned data block: */ { struct data_block_list *list = &setup->data; @@ -130,6 +131,7 @@ static void reset_context( struct setup_context *setup ) FREE(block); } + assert(list->tail->next == NULL); list->head = list->tail; list->head->used = 0; } @@ -588,6 +590,8 @@ lp_setup_destroy( struct setup_context *setup ) for (j = 0; j < TILES_Y; j++) FREE(setup->tile[i][j].head); + FREE(setup->data.head); + lp_rast_destroy( setup->rast ); FREE( setup ); } -- cgit v1.2.3 From 0177c6e66cfddeb62feca86e7bd5ae763b9b5244 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Sat, 10 Oct 2009 18:44:46 +0100 Subject: llvmpipe: Only invoke the shader if necessary. --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 6b5bee4af3..f096972d63 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -140,7 +140,8 @@ do_block( struct lp_rasterizer *rast, cx3 += xstep3; } - lp_rast_shade_quads(rast, &tri->inputs, x, y + iy, masks); + if(masks[0] || masks[1] || masks[2] || masks[3]) + lp_rast_shade_quads(rast, &tri->inputs, x, y + iy, masks); c1 += ystep1; c2 += ystep2; -- cgit v1.2.3 From 2e3580d994e2caf6d81763803c8525a7ed42b8fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Sun, 18 Oct 2009 11:57:43 +0100 Subject: llvmpipe: Maintain a copy of the shader constants to prevent clobbering. --- src/gallium/drivers/llvmpipe/lp_setup.c | 64 +++++++++++++++++++------ src/gallium/drivers/llvmpipe/lp_setup_context.h | 13 ++++- 2 files changed, 61 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index a74756de7c..08dac459db 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -101,6 +101,11 @@ static void reset_context( struct setup_context *setup ) SETUP_DEBUG("%s\n", __FUNCTION__); + /* Reset derived data */ + pipe_buffer_reference(&setup->constants.current, NULL); + setup->constants.stored_size = 0; + setup->constants.stored_data = NULL; + /* Free all but last binner command lists: */ for (i = 0; i < setup->tiles_x; i++) { @@ -424,18 +429,11 @@ void lp_setup_set_fs_constants(struct setup_context *setup, struct pipe_buffer *buffer) { - const void *data = buffer ? llvmpipe_buffer(buffer)->data : NULL; - struct pipe_buffer *dummy; - SETUP_DEBUG("%s\n", __FUNCTION__); - /* FIXME: hold on to the reference */ - dummy = NULL; - pipe_buffer_reference(&dummy, buffer); + pipe_buffer_reference(&setup->constants.current, buffer); - setup->fs.current.jit_context.constants = data; - - setup->fs.dirty = TRUE; + setup->dirty |= LP_SETUP_NEW_CONSTANTS; } @@ -447,7 +445,7 @@ lp_setup_set_alpha_ref_value( struct setup_context *setup, if(setup->fs.current.jit_context.alpha_ref_value != alpha_ref_value) { setup->fs.current.jit_context.alpha_ref_value = alpha_ref_value; - setup->fs.dirty = TRUE; + setup->dirty |= LP_SETUP_NEW_FS; } } @@ -468,7 +466,7 @@ lp_setup_set_blend_color( struct setup_context *setup, setup->fs.current.jit_context.blend_color[i*4 + j] = c; } - setup->fs.dirty = TRUE; + setup->dirty |= LP_SETUP_NEW_FS; } void @@ -505,7 +503,7 @@ lp_setup_set_sampler_textures( struct setup_context *setup, } } - setup->fs.dirty = TRUE; + setup->dirty |= LP_SETUP_NEW_FS; } boolean @@ -524,7 +522,43 @@ lp_setup_update_shader_state( struct setup_context *setup ) assert(setup->fs.current.jit_function); - if(setup->fs.dirty) { + if(setup->dirty & LP_SETUP_NEW_CONSTANTS) { + struct pipe_buffer *buffer = setup->constants.current; + + if(buffer) { + unsigned current_size = buffer->size; + const void *current_data = llvmpipe_buffer(buffer)->data; + + /* TODO: copy only the actually used constants? */ + + if(setup->constants.stored_size != current_size || + !setup->constants.stored_data || + memcmp(setup->constants.stored_data, + current_data, + current_size) != 0) { + void *stored; + + stored = get_data(&setup->data, current_size); + if(stored) { + memcpy(stored, + current_data, + current_size); + setup->constants.stored_size = current_size; + setup->constants.stored_data = stored; + } + } + } + else { + setup->constants.stored_size = 0; + setup->constants.stored_data = NULL; + } + + setup->fs.current.jit_context.constants = setup->constants.stored_data; + setup->dirty |= LP_SETUP_NEW_FS; + } + + + if(setup->dirty & LP_SETUP_NEW_FS) { if(!setup->fs.stored || memcmp(setup->fs.stored, &setup->fs.current, @@ -539,10 +573,10 @@ lp_setup_update_shader_state( struct setup_context *setup ) setup->fs.stored = stored; } } - - setup->fs.dirty = FALSE; } + setup->dirty = 0; + assert(setup->fs.stored); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index c15a59e4d1..82ec71f100 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -43,6 +43,10 @@ #define DATA_BLOCK_SIZE (16 * 1024 - sizeof(unsigned) - sizeof(void *)) +#define LP_SETUP_NEW_FS 0x01 +#define LP_SETUP_NEW_CONSTANTS 0x02 + + /* switch to a non-pointer value for this: */ typedef void (*lp_rast_cmd)( struct lp_rasterizer *, const union lp_rast_cmd_arg ); @@ -112,9 +116,16 @@ struct setup_context { const struct lp_rast_state *stored; struct lp_rast_state current; - boolean dirty; } fs; + struct { + struct pipe_buffer *current; + unsigned stored_size; + const void *stored_data; + } constants; + + unsigned dirty; + void (*point)( struct setup_context *, const float (*v0)[4]); -- cgit v1.2.3 From d9f44abe3bb0c9897937ef7f343a7896a0b4cbf0 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 19 Oct 2009 11:23:43 +0100 Subject: llvmpipe: fix typo correcting for fill convention Adjustments for top-left fill convention were being lost. --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 1725614902..961bd103a7 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -269,8 +269,8 @@ do_triangle_ccw(struct setup_context *setup, struct lp_rast_triangle *tri = get_data( &setup->data, sizeof *tri ); float area; - float c1, c2, c3; int minx, maxx, miny, maxy; + float c1, c2, c3; tri->inputs.state = setup->fs.stored; @@ -328,9 +328,9 @@ do_triangle_ccw(struct setup_context *setup, /* correct for top-left fill convention: */ - if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) c1++; - if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) c2++; - if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) c3++; + if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) tri->c1++; + if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) tri->c2++; + if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) tri->c3++; /* find trivial reject offsets for each edge for a single-pixel * sized block. These will be scaled up at each recursive level to -- cgit v1.2.3 From 269342d916fff3bf0fa0a5c1f26aec30b62ed352 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 19 Oct 2009 11:29:05 +0100 Subject: llvmpipe: correctly scale top/left fill adjustments Was overdoing it previously. --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 961bd103a7..89b2b4eb37 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -328,9 +328,9 @@ do_triangle_ccw(struct setup_context *setup, /* correct for top-left fill convention: */ - if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) tri->c1++; - if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) tri->c2++; - if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) tri->c3++; + if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) tri->c1 += 1.0/16.0f; + if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) tri->c2 += 1.0/16.0f; + if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) tri->c3 += 1.0/16.0f; /* find trivial reject offsets for each edge for a single-pixel * sized block. These will be scaled up at each recursive level to -- cgit v1.2.3 From f2be08ae0e20b3da8ff684ffeb94412cc6a5a5a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Mon, 19 Oct 2009 11:53:22 +0100 Subject: llvmpipe: Allocate the blend color from the data store, and ensure it's aligned. --- src/gallium/drivers/llvmpipe/lp_setup.c | 34 +++++++++++++++++-------- src/gallium/drivers/llvmpipe/lp_setup_context.h | 28 ++++++++++++++++++-- 2 files changed, 50 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 08dac459db..da5a68cd40 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -105,6 +105,7 @@ static void reset_context( struct setup_context *setup ) pipe_buffer_reference(&setup->constants.current, NULL); setup->constants.stored_size = 0; setup->constants.stored_data = NULL; + setup->dirty = ~0; /* Free all but last binner command lists: */ @@ -453,20 +454,14 @@ void lp_setup_set_blend_color( struct setup_context *setup, const struct pipe_blend_color *blend_color ) { - unsigned i, j; - SETUP_DEBUG("%s\n", __FUNCTION__); - if(!setup->fs.current.jit_context.blend_color) - setup->fs.current.jit_context.blend_color = align_malloc(4 * 16, 16); + assert(blend_color); - for (i = 0; i < 4; ++i) { - uint8_t c = float_to_ubyte(blend_color->color[i]); - for (j = 0; j < 16; ++j) - setup->fs.current.jit_context.blend_color[i*4 + j] = c; + if(memcmp(&setup->blend_color.current, blend_color, sizeof *blend_color) != 0) { + memcpy(&setup->blend_color.current, blend_color, sizeof *blend_color); + setup->dirty |= LP_SETUP_NEW_BLEND_COLOR; } - - setup->dirty |= LP_SETUP_NEW_FS; } void @@ -522,6 +517,25 @@ lp_setup_update_shader_state( struct setup_context *setup ) assert(setup->fs.current.jit_function); + if(setup->dirty & LP_SETUP_NEW_BLEND_COLOR) { + uint8_t *stored; + unsigned i, j; + + stored = get_data_aligned(&setup->data, 4 * 16, 16); + + for (i = 0; i < 4; ++i) { + uint8_t c = float_to_ubyte(setup->blend_color.current.color[i]); + for (j = 0; j < 16; ++j) + stored[i*4 + j] = c; + } + + setup->blend_color.stored = stored; + + setup->fs.current.jit_context.blend_color = setup->blend_color.stored; + setup->dirty |= LP_SETUP_NEW_FS; + } + + if(setup->dirty & LP_SETUP_NEW_CONSTANTS) { struct pipe_buffer *buffer = setup->constants.current; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 82ec71f100..bcd3b9b7aa 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -43,8 +43,9 @@ #define DATA_BLOCK_SIZE (16 * 1024 - sizeof(unsigned) - sizeof(void *)) -#define LP_SETUP_NEW_FS 0x01 -#define LP_SETUP_NEW_CONSTANTS 0x02 +#define LP_SETUP_NEW_FS 0x01 +#define LP_SETUP_NEW_CONSTANTS 0x02 +#define LP_SETUP_NEW_BLEND_COLOR 0x04 /* switch to a non-pointer value for this: @@ -124,6 +125,11 @@ struct setup_context { const void *stored_data; } constants; + struct { + struct pipe_blend_color current; + uint8_t *stored; + } blend_color; + unsigned dirty; void (*point)( struct setup_context *, @@ -163,6 +169,24 @@ static INLINE void *get_data( struct data_block_list *list, } } +static INLINE void *get_data_aligned( struct data_block_list *list, + unsigned size, + unsigned alignment ) +{ + + if (list->tail->used + size + alignment - 1 > DATA_BLOCK_SIZE) { + lp_setup_new_data_block( list ); + } + + { + struct data_block *tail = list->tail; + ubyte *data = tail->data + tail->used; + unsigned offset = (((uintptr_t)data + alignment - 1) & ~(alignment - 1)) - (uintptr_t)data; + tail->used += offset + size; + return data + offset; + } +} + /* Add a command to a given bin. */ static INLINE void bin_command( struct cmd_block_list *list, -- cgit v1.2.3 From 301c1494b27ad92ff1237909f9c98c1660be8fc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Mon, 19 Oct 2009 13:14:33 +0100 Subject: llvmpipe: Reset the pointer to stored jit context. --- src/gallium/drivers/llvmpipe/lp_setup.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index da5a68cd40..c0f516e12c 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -101,10 +101,10 @@ static void reset_context( struct setup_context *setup ) SETUP_DEBUG("%s\n", __FUNCTION__); - /* Reset derived data */ - pipe_buffer_reference(&setup->constants.current, NULL); + /* Reset derived state */ setup->constants.stored_size = 0; setup->constants.stored_data = NULL; + setup->fs.stored = NULL; setup->dirty = ~0; /* Free all but last binner command lists: @@ -634,6 +634,8 @@ lp_setup_destroy( struct setup_context *setup ) reset_context( setup ); + pipe_buffer_reference(&setup->constants.current, NULL); + for (i = 0; i < TILES_X; i++) for (j = 0; j < TILES_Y; j++) FREE(setup->tile[i][j].head); @@ -671,6 +673,8 @@ lp_setup_create( struct pipe_screen *screen ) setup->line = first_line; setup->point = first_point; + setup->dirty = ~0; + return setup; fail: -- cgit v1.2.3 From 0580079864c41c236a4167a1543b1a2fc5090362 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 19 Oct 2009 12:24:18 +0100 Subject: llvmpipe: fixed-point rasterization --- src/gallium/drivers/llvmpipe/lp_rast.h | 35 +++--- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 88 +++++++------- src/gallium/drivers/llvmpipe/lp_setup_context.h | 9 ++ src/gallium/drivers/llvmpipe/lp_setup_tri.c | 145 ++++++++++++------------ 4 files changed, 147 insertions(+), 130 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index f371b709df..9725007119 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -39,7 +39,10 @@ struct lp_rasterizer; struct pipe_screen; -#define TILESIZE 64 +#define FIXED_ORDER 4 +#define FIXED_ONE (1<dy12; - float xstep2 = -tri->dy23; - float xstep3 = -tri->dy31; + const int xstep1 = -tri->dy12 * FIXED_ONE; + const int xstep2 = -tri->dy23 * FIXED_ONE; + const int xstep3 = -tri->dy31 * FIXED_ONE; - float ystep1 = tri->dx12; - float ystep2 = tri->dx23; - float ystep3 = tri->dx31; + const int ystep1 = tri->dx12 * FIXED_ONE; + const int ystep2 = tri->dx23 * FIXED_ONE; + const int ystep3 = tri->dx31 * FIXED_ONE; unsigned mask = 0; @@ -108,26 +108,26 @@ static void do_block( struct lp_rasterizer *rast, const struct lp_rast_triangle *tri, int x, int y, - float c1, - float c2, - float c3 ) + int c1, + int c2, + int c3 ) { - const int step = 2; + const int step = 2 * FIXED_ONE; - float xstep1 = -step * tri->dy12; - float xstep2 = -step * tri->dy23; - float xstep3 = -step * tri->dy31; + const int xstep1 = -step * tri->dy12; + const int xstep2 = -step * tri->dy23; + const int xstep3 = -step * tri->dy31; - float ystep1 = step * tri->dx12; - float ystep2 = step * tri->dx23; - float ystep3 = step * tri->dx31; + const int ystep1 = step * tri->dx12; + const int ystep2 = step * tri->dx23; + const int ystep3 = step * tri->dx31; int ix, iy; for (iy = 0; iy < BLOCKSIZE; iy += 2) { - float cx1 = c1; - float cx2 = c2; - float cx3 = c3; + int cx1 = c1; + int cx2 = c2; + int cx3 = c3; unsigned masks[4] = {0, 0, 0, 0}; @@ -160,23 +160,23 @@ void lp_rast_triangle( struct lp_rasterizer *rast, { const struct lp_rast_triangle *tri = arg.triangle; - const int step = BLOCKSIZE; + const int step = BLOCKSIZE * FIXED_ONE; - float ei1 = tri->ei1 * step; - float ei2 = tri->ei2 * step; - float ei3 = tri->ei3 * step; + int ei1 = tri->ei1 * step; + int ei2 = tri->ei2 * step; + int ei3 = tri->ei3 * step; - float eo1 = tri->eo1 * step; - float eo2 = tri->eo2 * step; - float eo3 = tri->eo3 * step; + int eo1 = tri->eo1 * step; + int eo2 = tri->eo2 * step; + int eo3 = tri->eo3 * step; - float xstep1 = -step * tri->dy12; - float xstep2 = -step * tri->dy23; - float xstep3 = -step * tri->dy31; + int xstep1 = -step * tri->dy12; + int xstep2 = -step * tri->dy23; + int xstep3 = -step * tri->dy31; - float ystep1 = step * tri->dx12; - float ystep2 = step * tri->dx23; - float ystep3 = step * tri->dx31; + int ystep1 = step * tri->dx12; + int ystep2 = step * tri->dx23; + int ystep3 = step * tri->dx31; /* Clamp to tile dimensions: */ @@ -186,8 +186,8 @@ void lp_rast_triangle( struct lp_rasterizer *rast, int maxy = MIN2(tri->maxy, rast->y + TILE_SIZE); int x, y; - float x0, y0; - float c1, c2, c3; + int x0, y0; + int c1, c2, c3; debug_printf("%s\n", __FUNCTION__); @@ -196,23 +196,23 @@ void lp_rast_triangle( struct lp_rasterizer *rast, return; } - minx &= ~(step-1); - miny &= ~(step-1); + minx &= ~(BLOCKSIZE-1); + miny &= ~(BLOCKSIZE-1); - x0 = (float)minx; - y0 = (float)miny; + x0 = minx << FIXED_ORDER; + y0 = miny << FIXED_ORDER; c1 = tri->c1 + tri->dx12 * y0 - tri->dy12 * x0; c2 = tri->c2 + tri->dx23 * y0 - tri->dy23 * x0; c3 = tri->c3 + tri->dx31 * y0 - tri->dy31 * x0; - for (y = miny; y < maxy; y += step) + for (y = miny; y < maxy; y += BLOCKSIZE) { - float cx1 = c1; - float cx2 = c2; - float cx3 = c3; + int cx1 = c1; + int cx2 = c2; + int cx3 = c3; - for (x = minx; x < maxx; x += step) + for (x = minx; x < maxx; x += BLOCKSIZE) { if (cx1 + eo1 < 0 || cx2 + eo2 < 0 || diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index bcd3b9b7aa..d91ffc7c20 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -169,6 +169,15 @@ static INLINE void *get_data( struct data_block_list *list, } } +/* Put back data if we decide not to use it, eg. culled triangles. + */ +static INLINE void putback_data( struct data_block_list *list, + unsigned size) +{ + list->tail->used -= size; +} + + static INLINE void *get_data_aligned( struct data_block_list *list, unsigned size, unsigned alignment ) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 89b2b4eb37..44386a225d 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -223,10 +223,9 @@ static void setup_tri_coefficients( struct setup_context *setup, /* XXX: do this by add/subtracting a large floating point number: */ -static inline float subpixel_snap( float a ) +static inline int subpixel_snap( float a ) { - int i = a * 16; - return (float)i * (1.0/16); + return util_iround(FIXED_ONE * a); } @@ -256,23 +255,18 @@ do_triangle_ccw(struct setup_context *setup, const float (*v3)[4], boolean frontfacing ) { - const int rt_width = setup->fb.width; - const int rt_height = setup->fb.height; - const float y1 = subpixel_snap(v1[0][1]); - const float y2 = subpixel_snap(v2[0][1]); - const float y3 = subpixel_snap(v3[0][1]); + const int y1 = subpixel_snap(v1[0][1]); + const int y2 = subpixel_snap(v2[0][1]); + const int y3 = subpixel_snap(v3[0][1]); - const float x1 = subpixel_snap(v1[0][0]); - const float x2 = subpixel_snap(v2[0][0]); - const float x3 = subpixel_snap(v3[0][0]); + const int x1 = subpixel_snap(v1[0][0]); + const int x2 = subpixel_snap(v2[0][0]); + const int x3 = subpixel_snap(v3[0][0]); struct lp_rast_triangle *tri = get_data( &setup->data, sizeof *tri ); float area; int minx, maxx, miny, maxy; - float c1, c2, c3; - - tri->inputs.state = setup->fs.stored; tri->dx12 = x1 - x2; tri->dx23 = x2 - x3; @@ -285,35 +279,32 @@ do_triangle_ccw(struct setup_context *setup, area = (tri->dx12 * tri->dy31 - tri->dx31 * tri->dy12); - /* Cull non-ccw and zero-sized triangles. + /* Cull non-ccw and zero-sized triangles. + * + * XXX: subject to overflow?? */ - if (area <= 0 || util_is_inf_or_nan(area)) + if (area <= 0) { + putback_data( &setup->data, sizeof *tri ); return; + } // Bounding rectangle - minx = util_iround(MIN3(x1, x2, x3) - .5); - maxx = util_iround(MAX3(x1, x2, x3) + .5); - miny = util_iround(MIN3(y1, y2, y3) - .5); - maxy = util_iround(MAX3(y1, y2, y3) + .5); + tri->minx = (MIN3(x1, x2, x3) + 0xf) >> FIXED_ORDER; + tri->maxx = (MAX3(x1, x2, x3) + 0xf) >> FIXED_ORDER; + tri->miny = (MIN3(y1, y2, y3) + 0xf) >> FIXED_ORDER; + tri->maxy = (MAX3(y1, y2, y3) + 0xf) >> FIXED_ORDER; - /* Clamp to framebuffer (or tile) dimensions: - */ - miny = MAX2(0, miny); - minx = MAX2(0, minx); - maxy = MIN2(rt_height, maxy); - maxx = MIN2(rt_width, maxx); - - if (miny == maxy || minx == maxx) + if (tri->miny == tri->maxy || + tri->minx == tri->maxx) { + putback_data( &setup->data, sizeof *tri ); return; + } - tri->miny = miny; - tri->minx = minx; - tri->maxy = maxy; - tri->maxx = maxx; + tri->inputs.state = setup->fs.stored; - /* The only divide in this code. Is it really needed? + /* */ - tri->oneoverarea = 1.0f / area; + tri->oneoverarea = ((float)FIXED_ONE) / (float)area; /* Setup parameter interpolants: */ @@ -328,9 +319,9 @@ do_triangle_ccw(struct setup_context *setup, /* correct for top-left fill convention: */ - if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) tri->c1 += 1.0/16.0f; - if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) tri->c2 += 1.0/16.0f; - if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) tri->c3 += 1.0/16.0f; + if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) tri->c1++; + if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) tri->c2++; + if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) tri->c3++; /* find trivial reject offsets for each edge for a single-pixel * sized block. These will be scaled up at each recursive level to @@ -355,17 +346,10 @@ do_triangle_ccw(struct setup_context *setup, tri->ei2 = tri->dx23 - tri->dy23 - tri->eo2; tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3; - minx &= ~(TILESIZE-1); /* aligned blocks */ - miny &= ~(TILESIZE-1); /* aligned blocks */ - - c1 = tri->c1 + tri->dx12 * miny - tri->dy12 * minx; - c2 = tri->c2 + tri->dx23 * miny - tri->dy23 * minx; - c3 = tri->c3 + tri->dx31 * miny - tri->dy31 * minx; - - minx /= TILESIZE; - miny /= TILESIZE; - maxx /= TILESIZE; - maxy /= TILESIZE; + minx = tri->minx / TILESIZE; + miny = tri->miny / TILESIZE; + maxx = tri->maxx / TILESIZE; + maxy = tri->maxy / TILESIZE; /* Convert to tile coordinates: */ @@ -378,23 +362,31 @@ do_triangle_ccw(struct setup_context *setup, } else { - const int step = TILESIZE; - - float ei1 = tri->ei1 * step; - float ei2 = tri->ei2 * step; - float ei3 = tri->ei3 * step; - - float eo1 = tri->eo1 * step; - float eo2 = tri->eo2 * step; - float eo3 = tri->eo3 * step; - - float xstep1 = -step * tri->dy12; - float xstep2 = -step * tri->dy23; - float xstep3 = -step * tri->dy31; - - float ystep1 = step * tri->dx12; - float ystep2 = step * tri->dx23; - float ystep3 = step * tri->dx31; + int c1 = (tri->c1 + + tri->dx12 * miny * TILESIZE * FIXED_ONE - + tri->dy12 * minx * TILESIZE * FIXED_ONE); + int c2 = (tri->c2 + + tri->dx23 * miny * TILESIZE * FIXED_ONE - + tri->dy23 * minx * TILESIZE * FIXED_ONE); + int c3 = (tri->c3 + + tri->dx31 * miny * TILESIZE * FIXED_ONE - + tri->dy31 * minx * TILESIZE * FIXED_ONE); + + int ei1 = tri->ei1 << (FIXED_ORDER + TILE_ORDER); + int ei2 = tri->ei2 << (FIXED_ORDER + TILE_ORDER); + int ei3 = tri->ei3 << (FIXED_ORDER + TILE_ORDER); + + int eo1 = tri->eo1 << (FIXED_ORDER + TILE_ORDER); + int eo2 = tri->eo2 << (FIXED_ORDER + TILE_ORDER); + int eo3 = tri->eo3 << (FIXED_ORDER + TILE_ORDER); + + int xstep1 = -(tri->dy12 << (FIXED_ORDER + TILE_ORDER)); + int xstep2 = -(tri->dy23 << (FIXED_ORDER + TILE_ORDER)); + int xstep3 = -(tri->dy31 << (FIXED_ORDER + TILE_ORDER)); + + int ystep1 = tri->dx12 << (FIXED_ORDER + TILE_ORDER); + int ystep2 = tri->dx23 << (FIXED_ORDER + TILE_ORDER); + int ystep3 = tri->dx31 << (FIXED_ORDER + TILE_ORDER); int x, y; @@ -406,12 +398,25 @@ do_triangle_ccw(struct setup_context *setup, */ for (y = miny; y <= maxy; y++) { - float cx1 = c1; - float cx2 = c2; - float cx3 = c3; + int cx1 = c1; + int cx2 = c2; + int cx3 = c3; for (x = minx; x <= maxx; x++) { + assert(cx1 == + tri->c1 + + tri->dx12 * y * TILESIZE * FIXED_ONE - + tri->dy12 * x * TILESIZE * FIXED_ONE); + assert(cx2 == + tri->c2 + + tri->dx23 * y * TILESIZE * FIXED_ONE - + tri->dy23 * x * TILESIZE * FIXED_ONE); + assert(cx3 == + tri->c3 + + tri->dx31 * y * TILESIZE * FIXED_ONE - + tri->dy31 * x * TILESIZE * FIXED_ONE); + if (cx1 + eo1 < 0 || cx2 + eo2 < 0 || cx3 + eo3 < 0) @@ -427,9 +432,9 @@ do_triangle_ccw(struct setup_context *setup, lp_rast_arg_inputs(&tri->inputs) ); } else - { + { /* shade partial tile */ - bin_command( &setup->tile[x][y], + bin_command( &setup->tile[x][y], lp_rast_triangle, lp_rast_arg_triangle(tri) ); } -- cgit v1.2.3 From 2f5f357c5b67869e75087fc1f17ed0d666fb134e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Mon, 19 Oct 2009 14:02:01 +0100 Subject: llvmpipe: Reshape the shader input from 8x2 to 4x4. Incorrect rendering until the interpolation code generation is updated. --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 19 ++++++++----------- src/gallium/drivers/llvmpipe/lp_tile_soa.h | 4 ++-- src/gallium/drivers/llvmpipe/lp_tile_soa.py | 6 ++++-- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index f9a8be20c7..c63aa22198 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -34,7 +34,7 @@ #include "lp_tile_soa.h" -#define BLOCKSIZE 8 +#define BLOCKSIZE 4 /* Convert 8x8 block into four runs of quads and render each in turn. @@ -55,11 +55,9 @@ static void block_full( struct lp_rasterizer *rast, const struct lp_rast_triangle *tri, int x, int y ) { - const unsigned masks[4] = {~0, ~0, 0, 0}; /* FIXME: Wasting quads!!! */ - int iy; + const unsigned masks[4] = {~0, ~0, ~0, ~0}; - for (iy = 0; iy < 4; iy += 2) - lp_rast_shade_quads(rast, &tri->inputs, x, y + iy, masks); + lp_rast_shade_quads(rast, &tri->inputs, x, y, masks); } #endif @@ -124,30 +122,29 @@ do_block( struct lp_rasterizer *rast, int ix, iy; + unsigned masks[2][2] = {{0, 0}, {0, 0}}; + for (iy = 0; iy < BLOCKSIZE; iy += 2) { int cx1 = c1; int cx2 = c2; int cx3 = c3; - unsigned masks[4] = {0, 0, 0, 0}; - for (ix = 0; ix < BLOCKSIZE; ix += 2) { - masks[ix >> 1] = do_quad(tri, x + ix, y + iy, cx1, cx2, cx3); + masks[iy >> 1][ix >> 1] = do_quad(tri, x + ix, y + iy, cx1, cx2, cx3); cx1 += xstep1; cx2 += xstep2; cx3 += xstep3; } - if(masks[0] || masks[1] || masks[2] || masks[3]) - lp_rast_shade_quads(rast, &tri->inputs, x, y + iy, masks); - c1 += ystep1; c2 += ystep2; c3 += ystep3; } + if(masks[0][0] || masks[0][1] || masks[1][0] || masks[1][1]) + lp_rast_shade_quads(rast, &tri->inputs, x, y, &masks[0][0]); } diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h index 040b01865d..d72d6d2ef1 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h @@ -46,8 +46,8 @@ struct pipe_transfer; #define TILE_SIZE 64 -#define TILE_VECTOR_HEIGHT 2 -#define TILE_VECTOR_WIDTH 8 +#define TILE_VECTOR_HEIGHT 4 +#define TILE_VECTOR_WIDTH 4 extern const unsigned char tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH]; diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py index 004c5c979e..a603b7f9f4 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -259,8 +259,10 @@ def main(): print print 'const unsigned char' print 'tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH] = {' - print ' { 0, 1, 4, 5, 8, 9, 12, 13},' - print ' { 2, 3, 6, 7, 10, 11, 14, 15}' + print ' { 0, 1, 4, 5},' + print ' { 2, 3, 6, 7},' + print ' { 8, 9, 12, 13},' + print ' { 10, 11, 14, 15}' print '};' print -- cgit v1.2.3 From 3fd6b724cc406573cf53684cd72fa7f60b65354a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 19 Oct 2009 14:55:05 +0100 Subject: llvmpipe: pre-multiply some constants by fixed_one --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 26 ++++++-------- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 56 ++++++++++++++++------------- 2 files changed, 43 insertions(+), 39 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index c63aa22198..17ebce4c85 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -66,13 +66,13 @@ do_quad( const struct lp_rast_triangle *tri, int x, int y, int c1, int c2, int c3 ) { - const int xstep1 = -tri->dy12 * FIXED_ONE; - const int xstep2 = -tri->dy23 * FIXED_ONE; - const int xstep3 = -tri->dy31 * FIXED_ONE; + const int xstep1 = -tri->dy12 ; + const int xstep2 = -tri->dy23 ; + const int xstep3 = -tri->dy31 ; - const int ystep1 = tri->dx12 * FIXED_ONE; - const int ystep2 = tri->dx23 * FIXED_ONE; - const int ystep3 = tri->dx31 * FIXED_ONE; + const int ystep1 = tri->dx12 ; + const int ystep2 = tri->dx23 ; + const int ystep3 = tri->dx31 ; unsigned mask = 0; @@ -110,7 +110,7 @@ do_block( struct lp_rasterizer *rast, int c2, int c3 ) { - const int step = 2 * FIXED_ONE; + const int step = 2 ; const int xstep1 = -step * tri->dy12; const int xstep2 = -step * tri->dy23; @@ -157,7 +157,7 @@ void lp_rast_triangle( struct lp_rasterizer *rast, { const struct lp_rast_triangle *tri = arg.triangle; - const int step = BLOCKSIZE * FIXED_ONE; + const int step = BLOCKSIZE; int ei1 = tri->ei1 * step; int ei2 = tri->ei2 * step; @@ -183,7 +183,6 @@ void lp_rast_triangle( struct lp_rasterizer *rast, int maxy = MIN2(tri->maxy, rast->y + TILE_SIZE); int x, y; - int x0, y0; int c1, c2, c3; debug_printf("%s\n", __FUNCTION__); @@ -196,12 +195,9 @@ void lp_rast_triangle( struct lp_rasterizer *rast, minx &= ~(BLOCKSIZE-1); miny &= ~(BLOCKSIZE-1); - x0 = minx << FIXED_ORDER; - y0 = miny << FIXED_ORDER; - - c1 = tri->c1 + tri->dx12 * y0 - tri->dy12 * x0; - c2 = tri->c2 + tri->dx23 * y0 - tri->dy23 * x0; - c3 = tri->c3 + tri->dx31 * y0 - tri->dy31 * x0; + c1 = tri->c1 + tri->dx12 * miny - tri->dy12 * minx; + c2 = tri->c2 + tri->dx23 * miny - tri->dy23 * minx; + c3 = tri->c3 + tri->dx31 * miny - tri->dy31 * minx; for (y = miny; y < maxy; y += BLOCKSIZE) { diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 44386a225d..6c9f75e90c 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -323,6 +323,14 @@ do_triangle_ccw(struct setup_context *setup, if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) tri->c2++; if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) tri->c3++; + tri->dy12 *= FIXED_ONE; + tri->dy23 *= FIXED_ONE; + tri->dy31 *= FIXED_ONE; + + tri->dx12 *= FIXED_ONE; + tri->dx23 *= FIXED_ONE; + tri->dx31 *= FIXED_ONE; + /* find trivial reject offsets for each edge for a single-pixel * sized block. These will be scaled up at each recursive level to * match the active blocksize. Scaling in this way works best if @@ -363,30 +371,30 @@ do_triangle_ccw(struct setup_context *setup, else { int c1 = (tri->c1 + - tri->dx12 * miny * TILESIZE * FIXED_ONE - - tri->dy12 * minx * TILESIZE * FIXED_ONE); + tri->dx12 * miny * TILESIZE - + tri->dy12 * minx * TILESIZE); int c2 = (tri->c2 + - tri->dx23 * miny * TILESIZE * FIXED_ONE - - tri->dy23 * minx * TILESIZE * FIXED_ONE); + tri->dx23 * miny * TILESIZE - + tri->dy23 * minx * TILESIZE); int c3 = (tri->c3 + - tri->dx31 * miny * TILESIZE * FIXED_ONE - - tri->dy31 * minx * TILESIZE * FIXED_ONE); + tri->dx31 * miny * TILESIZE - + tri->dy31 * minx * TILESIZE); - int ei1 = tri->ei1 << (FIXED_ORDER + TILE_ORDER); - int ei2 = tri->ei2 << (FIXED_ORDER + TILE_ORDER); - int ei3 = tri->ei3 << (FIXED_ORDER + TILE_ORDER); + int ei1 = tri->ei1 << TILE_ORDER; + int ei2 = tri->ei2 << TILE_ORDER; + int ei3 = tri->ei3 << TILE_ORDER; - int eo1 = tri->eo1 << (FIXED_ORDER + TILE_ORDER); - int eo2 = tri->eo2 << (FIXED_ORDER + TILE_ORDER); - int eo3 = tri->eo3 << (FIXED_ORDER + TILE_ORDER); + int eo1 = tri->eo1 << TILE_ORDER; + int eo2 = tri->eo2 << TILE_ORDER; + int eo3 = tri->eo3 << TILE_ORDER; - int xstep1 = -(tri->dy12 << (FIXED_ORDER + TILE_ORDER)); - int xstep2 = -(tri->dy23 << (FIXED_ORDER + TILE_ORDER)); - int xstep3 = -(tri->dy31 << (FIXED_ORDER + TILE_ORDER)); + int xstep1 = -(tri->dy12 << TILE_ORDER); + int xstep2 = -(tri->dy23 << TILE_ORDER); + int xstep3 = -(tri->dy31 << TILE_ORDER); - int ystep1 = tri->dx12 << (FIXED_ORDER + TILE_ORDER); - int ystep2 = tri->dx23 << (FIXED_ORDER + TILE_ORDER); - int ystep3 = tri->dx31 << (FIXED_ORDER + TILE_ORDER); + int ystep1 = tri->dx12 << TILE_ORDER; + int ystep2 = tri->dx23 << TILE_ORDER; + int ystep3 = tri->dx31 << TILE_ORDER; int x, y; @@ -406,16 +414,16 @@ do_triangle_ccw(struct setup_context *setup, { assert(cx1 == tri->c1 + - tri->dx12 * y * TILESIZE * FIXED_ONE - - tri->dy12 * x * TILESIZE * FIXED_ONE); + tri->dx12 * y * TILESIZE - + tri->dy12 * x * TILESIZE); assert(cx2 == tri->c2 + - tri->dx23 * y * TILESIZE * FIXED_ONE - - tri->dy23 * x * TILESIZE * FIXED_ONE); + tri->dx23 * y * TILESIZE - + tri->dy23 * x * TILESIZE); assert(cx3 == tri->c3 + - tri->dx31 * y * TILESIZE * FIXED_ONE - - tri->dy31 * x * TILESIZE * FIXED_ONE); + tri->dx31 * y * TILESIZE - + tri->dy31 * x * TILESIZE); if (cx1 + eo1 < 0 || cx2 + eo2 < 0 || -- cgit v1.2.3 From b0828b0adc7438ef33f9393f839226ef7dfda0dc Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 19 Oct 2009 16:41:27 +0100 Subject: llvmpipe: calculate masks in format desired by shader Also remove branches calculating masks for quads. --- src/gallium/drivers/llvmpipe/lp_rast.c | 21 ++++---- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 2 +- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 82 ++++++++++++----------------- 3 files changed, 44 insertions(+), 61 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 2038403c8f..01f46dcab1 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -193,7 +193,12 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { const struct lp_rast_shader_inputs *inputs = arg.shade_tile; - const unsigned masks[4] = {~0, ~0, ~0, ~0}; + static const uint32_t ALIGN16_ATTRIB masks[4][4] = + { {~0, ~0, ~0, ~0}, + {~0, ~0, ~0, ~0}, + {~0, ~0, ~0, ~0}, + {~0, ~0, ~0, ~0} }; + unsigned x, y; RAST_DEBUG("%s\n", __FUNCTION__); @@ -202,23 +207,20 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, */ for (y = 0; y < TILE_SIZE; y += 2) for (x = 0; x < TILE_SIZE; x += 8) - lp_rast_shade_quads( rast, inputs, rast->x + x, rast->y + y, masks); + lp_rast_shade_quads( rast, inputs, rast->x + x, rast->y + y, &masks[0][0]); } void lp_rast_shade_quads( struct lp_rasterizer *rast, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, - const unsigned *masks) + const uint32_t *masks) { #if 1 const struct lp_rast_state *state = inputs->state; struct lp_rast_tile *tile = &rast->tile; void *color; void *depth; - uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS]; - unsigned chan_index; - unsigned q; unsigned ix, iy; /* Sanity checks */ @@ -228,11 +230,6 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, ix = x % TILE_SIZE; iy = y % TILE_SIZE; - /* mask */ - for (q = 0; q < 4; ++q) - for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index) - mask[q][chan_index] = masks[q] & (1 << chan_index) ? ~0 : 0; - /* color buffer */ color = &TILE_PIXEL(tile->color, ix, iy, 0); @@ -254,7 +251,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, inputs->a0, inputs->dadx, inputs->dady, - &mask[0][0], + masks, color, depth); #else diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 11e8e78e79..f438faaf36 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -80,6 +80,6 @@ struct lp_rasterizer { void lp_rast_shade_quads( struct lp_rasterizer *rast, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, - const unsigned *masks); + const uint32_t *masks); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 17ebce4c85..5f22aca668 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -37,34 +37,26 @@ #define BLOCKSIZE 4 -/* Convert 8x8 block into four runs of quads and render each in turn. +/* Render a 4x4 unmasked block: */ -#if (BLOCKSIZE == 8) static void block_full( struct lp_rasterizer *rast, const struct lp_rast_triangle *tri, int x, int y ) { - const unsigned masks[4] = {~0, ~0, ~0, ~0}; - int iy; + static const uint32_t ALIGN16_ATTRIB masks[4][4] = + { {~0, ~0, ~0, ~0}, + {~0, ~0, ~0, ~0}, + {~0, ~0, ~0, ~0}, + {~0, ~0, ~0, ~0} }; - for (iy = 0; iy < 8; iy += 2) - lp_rast_shade_quads(rast, &tri->inputs, x, y + iy, masks); + lp_rast_shade_quads(rast, &tri->inputs, x, y, &masks[0][0]); } -#else -static void block_full( struct lp_rasterizer *rast, - const struct lp_rast_triangle *tri, - int x, int y ) -{ - const unsigned masks[4] = {~0, ~0, ~0, ~0}; - lp_rast_shade_quads(rast, &tri->inputs, x, y, masks); -} -#endif -static INLINE unsigned +static INLINE void do_quad( const struct lp_rast_triangle *tri, - int x, int y, - int c1, int c2, int c3 ) + int c1, int c2, int c3, + int32_t *mask ) { const int xstep1 = -tri->dy12 ; const int xstep2 = -tri->dy23 ; @@ -73,30 +65,22 @@ do_quad( const struct lp_rast_triangle *tri, const int ystep1 = tri->dx12 ; const int ystep2 = tri->dx23 ; const int ystep3 = tri->dx31 ; - - unsigned mask = 0; - - if (c1 > 0 && - c2 > 0 && - c3 > 0) - mask |= 1; - - if (c1 + xstep1 > 0 && - c2 + xstep2 > 0 && - c3 + xstep3 > 0) - mask |= 2; - - if (c1 + ystep1 > 0 && - c2 + ystep2 > 0 && - c3 + ystep3 > 0) - mask |= 4; - - if (c1 + ystep1 + xstep1 > 0 && - c2 + ystep2 + xstep2 > 0 && - c3 + ystep3 + xstep3 > 0) - mask |= 8; - - return mask; + + mask[0] = ~(((c1) | + (c2) | + (c3)) >> 31); + + mask[1] = ~(((c1 + xstep1) | + (c2 + xstep2) | + (c3 + xstep3)) >> 31); + + mask[2] = ~(((c1 + ystep1) | + (c2 + ystep2) | + (c3 + ystep3)) >> 31); + + mask[3] = ~(((c1 + ystep1 + xstep1) | + (c2 + ystep2 + xstep2) | + (c3 + ystep3 + xstep3)) >> 31); } /* Evaluate each pixel in a block, generate a mask and possibly render @@ -121,17 +105,17 @@ do_block( struct lp_rasterizer *rast, const int ystep3 = step * tri->dx31; int ix, iy; + uint32_t ALIGN16_ATTRIB mask[4][4]; - unsigned masks[2][2] = {{0, 0}, {0, 0}}; - for (iy = 0; iy < BLOCKSIZE; iy += 2) { + for (iy = 0; iy < 4; iy += 2) { int cx1 = c1; int cx2 = c2; int cx3 = c3; - for (ix = 0; ix < BLOCKSIZE; ix += 2) { + for (ix = 0; ix < 2; ix ++) { - masks[iy >> 1][ix >> 1] = do_quad(tri, x + ix, y + iy, cx1, cx2, cx3); + do_quad(tri, cx1, cx2, cx3, (int32_t *)mask[iy+ix]); cx1 += xstep1; cx2 += xstep2; @@ -143,8 +127,10 @@ do_block( struct lp_rasterizer *rast, c3 += ystep3; } - if(masks[0][0] || masks[0][1] || masks[1][0] || masks[1][1]) - lp_rast_shade_quads(rast, &tri->inputs, x, y, &masks[0][0]); + /* As we do trivial reject already, masks should rarely be all + * zero: + */ + lp_rast_shade_quads(rast, &tri->inputs, x, y, &mask[0][0] ); } -- cgit v1.2.3 From 5b07d4de38b732f99237161d940f40e3ce6e29c3 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 19 Oct 2009 17:10:48 +0100 Subject: llvmpipe: remove a leftover 8x2 usage --- src/gallium/drivers/llvmpipe/lp_rast.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 01f46dcab1..85b756e453 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -203,10 +203,10 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, RAST_DEBUG("%s\n", __FUNCTION__); - /* Use the existing preference for 8x2 (four quads) shading: + /* Use the existing preference for 4x4 (four quads) shading: */ - for (y = 0; y < TILE_SIZE; y += 2) - for (x = 0; x < TILE_SIZE; x += 8) + for (y = 0; y < TILE_SIZE; y += 4) + for (x = 0; x < TILE_SIZE; x += 4) lp_rast_shade_quads( rast, inputs, rast->x + x, rast->y + y, &masks[0][0]); } @@ -239,7 +239,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, depth = tile->depth + iy*TILE_SIZE + 2*ix; /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ - assert(lp_check_alignment(mask, 16)); + assert(lp_check_alignment(masks, 16)); assert(lp_check_alignment(depth, 16)); assert(lp_check_alignment(color, 16)); -- cgit v1.2.3 From 7670628061c2a6ce0a1a787556b0e33a38fd3049 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 20 Oct 2009 02:46:00 +0100 Subject: llvmpipe: precalculate some offsets --- src/gallium/drivers/llvmpipe/lp_rast.c | 20 ++++---- src/gallium/drivers/llvmpipe/lp_rast.h | 2 + src/gallium/drivers/llvmpipe/lp_rast_priv.h | 2 +- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 80 +++++------------------------ src/gallium/drivers/llvmpipe/lp_setup_tri.c | 26 ++++++++++ 5 files changed, 51 insertions(+), 79 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 85b756e453..39fb8cdb6b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -193,12 +193,7 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { const struct lp_rast_shader_inputs *inputs = arg.shade_tile; - static const uint32_t ALIGN16_ATTRIB masks[4][4] = - { {~0, ~0, ~0, ~0}, - {~0, ~0, ~0, ~0}, - {~0, ~0, ~0, ~0}, - {~0, ~0, ~0, ~0} }; - + const unsigned mask = ~0; unsigned x, y; RAST_DEBUG("%s\n", __FUNCTION__); @@ -207,26 +202,31 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, */ for (y = 0; y < TILE_SIZE; y += 4) for (x = 0; x < TILE_SIZE; x += 4) - lp_rast_shade_quads( rast, inputs, rast->x + x, rast->y + y, &masks[0][0]); + lp_rast_shade_quads( rast, inputs, rast->x + x, rast->y + y, mask); } void lp_rast_shade_quads( struct lp_rasterizer *rast, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, - const uint32_t *masks) + unsigned mask) { #if 1 const struct lp_rast_state *state = inputs->state; struct lp_rast_tile *tile = &rast->tile; void *color; void *depth; - unsigned ix, iy; + uint32_t ALIGN16_ATTRIB masks[16]; + unsigned ix, iy, i; /* Sanity checks */ assert(x % TILE_VECTOR_WIDTH == 0); assert(y % TILE_VECTOR_HEIGHT == 0); + /* mask */ + for (i = 0; i < 16; ++i) + masks[i] = mask & (1 << i) ? ~0 : 0; + ix = x % TILE_SIZE; iy = y % TILE_SIZE; @@ -251,7 +251,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, inputs->a0, inputs->dadx, inputs->dady, - masks, + &masks[0], color, depth); #else diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 9725007119..318bf73b15 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -111,6 +111,8 @@ struct lp_rast_triangle { int c2; int c3; + int step[3][16]; + /* XXX: this is only used inside lp_setup_tri.c, don't really * need it here: */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index f438faaf36..2333729807 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -80,6 +80,6 @@ struct lp_rasterizer { void lp_rast_shade_quads( struct lp_rasterizer *rast, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, - const uint32_t *masks); + unsigned masks); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 5f22aca668..b5a3753a88 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -37,100 +37,44 @@ #define BLOCKSIZE 4 + /* Render a 4x4 unmasked block: */ static void block_full( struct lp_rasterizer *rast, const struct lp_rast_triangle *tri, int x, int y ) { - static const uint32_t ALIGN16_ATTRIB masks[4][4] = - { {~0, ~0, ~0, ~0}, - {~0, ~0, ~0, ~0}, - {~0, ~0, ~0, ~0}, - {~0, ~0, ~0, ~0} }; + unsigned mask = ~0; - lp_rast_shade_quads(rast, &tri->inputs, x, y, &masks[0][0]); + lp_rast_shade_quads(rast, &tri->inputs, x, y, mask); } -static INLINE void -do_quad( const struct lp_rast_triangle *tri, - int c1, int c2, int c3, - int32_t *mask ) -{ - const int xstep1 = -tri->dy12 ; - const int xstep2 = -tri->dy23 ; - const int xstep3 = -tri->dy31 ; - - const int ystep1 = tri->dx12 ; - const int ystep2 = tri->dx23 ; - const int ystep3 = tri->dx31 ; - - mask[0] = ~(((c1) | - (c2) | - (c3)) >> 31); - - mask[1] = ~(((c1 + xstep1) | - (c2 + xstep2) | - (c3 + xstep3)) >> 31); - - mask[2] = ~(((c1 + ystep1) | - (c2 + ystep2) | - (c3 + ystep3)) >> 31); - - mask[3] = ~(((c1 + ystep1 + xstep1) | - (c2 + ystep2 + xstep2) | - (c3 + ystep3 + xstep3)) >> 31); -} /* Evaluate each pixel in a block, generate a mask and possibly render * the quad: */ static void do_block( struct lp_rasterizer *rast, - const struct lp_rast_triangle *tri, + const struct lp_rast_triangle *tri, int x, int y, int c1, int c2, int c3 ) { - const int step = 2 ; - - const int xstep1 = -step * tri->dy12; - const int xstep2 = -step * tri->dy23; - const int xstep3 = -step * tri->dy31; - - const int ystep1 = step * tri->dx12; - const int ystep2 = step * tri->dx23; - const int ystep3 = step * tri->dx31; + int i; + unsigned mask = 0; - int ix, iy; - uint32_t ALIGN16_ATTRIB mask[4][4]; - - - for (iy = 0; iy < 4; iy += 2) { - int cx1 = c1; - int cx2 = c2; - int cx3 = c3; - - for (ix = 0; ix < 2; ix ++) { - - do_quad(tri, cx1, cx2, cx3, (int32_t *)mask[iy+ix]); - - cx1 += xstep1; - cx2 += xstep2; - cx3 += xstep3; - } - - c1 += ystep1; - c2 += ystep2; - c3 += ystep3; - } + for (i = 0; i < 16; i++) + mask |= (~(((c1 + tri->step[0][i]) | + (c2 + tri->step[1][i]) | + (c3 + tri->step[2][i])) >> 31)) & (1 << i); + /* As we do trivial reject already, masks should rarely be all * zero: */ - lp_rast_shade_quads(rast, &tri->inputs, x, y, &mask[0][0] ); + lp_rast_shade_quads(rast, &tri->inputs, x, y, mask ); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 6c9f75e90c..a5a0407a57 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -359,6 +359,32 @@ do_triangle_ccw(struct setup_context *setup, maxx = tri->maxx / TILESIZE; maxy = tri->maxy / TILESIZE; + { + int xstep1 = -tri->dy12; + int xstep2 = -tri->dy23; + int xstep3 = -tri->dy31; + + int ystep1 = tri->dx12; + int ystep2 = tri->dx23; + int ystep3 = tri->dx31; + + int ix, iy; + int qx, qy; + int i = 0; + + for (qy = 0; qy < 4; qy += 2) { + for (qx = 0; qx < 4; qx += 2) { + for (iy = 0; iy < 2; iy++) { + for (ix = 0; ix < 2; ix++, i++) { + tri->step[0][i] = (xstep1 * (qx+ix)) + (ystep1 * (qy+iy)); + tri->step[1][i] = (xstep2 * (qx+ix)) + (ystep2 * (qy+iy)); + tri->step[2][i] = (xstep3 * (qx+ix)) + (ystep3 * (qy+iy)); + } + } + } + } + } + /* Convert to tile coordinates: */ if (miny == maxy && minx == maxx) -- cgit v1.2.3 From 7b116e13a2aa28a699e30c907c1b1ae5e04cab28 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 20 Oct 2009 03:17:17 +0100 Subject: llvmpipe: pass mask as a linear encoding of the 4x4 block --- src/gallium/drivers/llvmpipe/lp_rast.c | 41 ++++++++++++++++++++++++----- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 15 ++++------- 2 files changed, 40 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 39fb8cdb6b..6fd6acc0fa 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -216,16 +216,45 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, struct lp_rast_tile *tile = &rast->tile; void *color; void *depth; - uint32_t ALIGN16_ATTRIB masks[16]; - unsigned ix, iy, i; + uint32_t ALIGN16_ATTRIB masks[2][2][2][2]; + unsigned ix, iy; /* Sanity checks */ assert(x % TILE_VECTOR_WIDTH == 0); assert(y % TILE_VECTOR_HEIGHT == 0); - /* mask */ - for (i = 0; i < 16; ++i) - masks[i] = mask & (1 << i) ? ~0 : 0; + /* mask: the rasterizer wants to treat pixels in 4x4 blocks, but + * the pixel shader wants to swizzle them into 4 2x2 quads. + * + * Additionally, the pixel shader wants masks as full dword ~0, + * while the rasterizer wants to pack per-pixel bits tightly. + */ +#if 0 + unsigned qx, qy; + for (qy = 0; qy < 2; ++qy) + for (qx = 0; qx < 2; ++qx) + for (iy = 0; iy < 2; ++iy) + for (ix = 0; ix < 2; ++ix) + masks[qy][qx][iy][ix] = mask & (1 << (qy*8+iy*4+qx*2+ix)) ? ~0 : 0; +#else + masks[0][0][0][0] = mask & (1 << (0*8+0*4+0*2+0)) ? ~0 : 0; + masks[0][0][0][1] = mask & (1 << (0*8+0*4+0*2+1)) ? ~0 : 0; + masks[0][0][1][0] = mask & (1 << (0*8+1*4+0*2+0)) ? ~0 : 0; + masks[0][0][1][1] = mask & (1 << (0*8+1*4+0*2+1)) ? ~0 : 0; + masks[0][1][0][0] = mask & (1 << (0*8+0*4+1*2+0)) ? ~0 : 0; + masks[0][1][0][1] = mask & (1 << (0*8+0*4+1*2+1)) ? ~0 : 0; + masks[0][1][1][0] = mask & (1 << (0*8+1*4+1*2+0)) ? ~0 : 0; + masks[0][1][1][1] = mask & (1 << (0*8+1*4+1*2+1)) ? ~0 : 0; + + masks[1][0][0][0] = mask & (1 << (1*8+0*4+0*2+0)) ? ~0 : 0; + masks[1][0][0][1] = mask & (1 << (1*8+0*4+0*2+1)) ? ~0 : 0; + masks[1][0][1][0] = mask & (1 << (1*8+1*4+0*2+0)) ? ~0 : 0; + masks[1][0][1][1] = mask & (1 << (1*8+1*4+0*2+1)) ? ~0 : 0; + masks[1][1][0][0] = mask & (1 << (1*8+0*4+1*2+0)) ? ~0 : 0; + masks[1][1][0][1] = mask & (1 << (1*8+0*4+1*2+1)) ? ~0 : 0; + masks[1][1][1][0] = mask & (1 << (1*8+1*4+1*2+0)) ? ~0 : 0; + masks[1][1][1][1] = mask & (1 << (1*8+1*4+1*2+1)) ? ~0 : 0; +#endif ix = x % TILE_SIZE; iy = y % TILE_SIZE; @@ -251,7 +280,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, inputs->a0, inputs->dadx, inputs->dady, - &masks[0], + &masks[0][0][0][0], color, depth); #else diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index a5a0407a57..cf8643fc63 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -369,18 +369,13 @@ do_triangle_ccw(struct setup_context *setup, int ystep3 = tri->dx31; int ix, iy; - int qx, qy; int i = 0; - for (qy = 0; qy < 4; qy += 2) { - for (qx = 0; qx < 4; qx += 2) { - for (iy = 0; iy < 2; iy++) { - for (ix = 0; ix < 2; ix++, i++) { - tri->step[0][i] = (xstep1 * (qx+ix)) + (ystep1 * (qy+iy)); - tri->step[1][i] = (xstep2 * (qx+ix)) + (ystep2 * (qy+iy)); - tri->step[2][i] = (xstep3 * (qx+ix)) + (ystep3 * (qy+iy)); - } - } + for (iy = 0; iy < 4; iy++) { + for (ix = 0; ix < 4; ix++, i++) { + tri->step[0][i] = xstep1 * ix + ystep1 * iy; + tri->step[1][i] = xstep2 * ix + ystep2 * iy; + tri->step[2][i] = xstep3 * ix + ystep3 * iy; } } } -- cgit v1.2.3 From 1735325a23156b330c2281c91aec4a9b39ecbad9 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 20 Oct 2009 03:38:07 +0100 Subject: llvmpipe: recursive rasterization within a tile --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 174 ++++++++++++++++------------- 1 file changed, 98 insertions(+), 76 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index b5a3753a88..567e223168 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -40,7 +40,7 @@ /* Render a 4x4 unmasked block: */ -static void block_full( struct lp_rasterizer *rast, +static void block_full_4( struct lp_rasterizer *rast, const struct lp_rast_triangle *tri, int x, int y ) { @@ -50,17 +50,30 @@ static void block_full( struct lp_rasterizer *rast, } +static void block_full_16( struct lp_rasterizer *rast, + const struct lp_rast_triangle *tri, + int x, int y ) +{ + unsigned mask = ~0; + unsigned ix, iy; + + for (iy = 0; iy < 16; iy+=4) + for (ix = 0; ix < 16; ix+=4) + lp_rast_shade_quads(rast, &tri->inputs, x + ix, y + iy , mask); +} + + /* Evaluate each pixel in a block, generate a mask and possibly render * the quad: */ static void -do_block( struct lp_rasterizer *rast, - const struct lp_rast_triangle *tri, - int x, int y, - int c1, - int c2, - int c3 ) +do_block_4( struct lp_rasterizer *rast, + const struct lp_rast_triangle *tri, + int x, int y, + int c1, + int c2, + int c3 ) { int i; unsigned mask = 0; @@ -74,10 +87,54 @@ do_block( struct lp_rasterizer *rast, /* As we do trivial reject already, masks should rarely be all * zero: */ - lp_rast_shade_quads(rast, &tri->inputs, x, y, mask ); + if (mask) + lp_rast_shade_quads(rast, &tri->inputs, x, y, mask ); } +static void +do_block_16( struct lp_rasterizer *rast, + const struct lp_rast_triangle *tri, + int x, int y, + int c1, + int c2, + int c3 ) +{ + int ix,iy,i = 0; + + int ei1 = tri->ei1 << 2; + int ei2 = tri->ei2 << 2; + int ei3 = tri->ei3 << 2; + + int eo1 = tri->eo1 << 2; + int eo2 = tri->eo2 << 2; + int eo3 = tri->eo3 << 2; + for (iy = 0; iy < 16; iy+=4) + { + for (ix = 0; ix < 16; ix+=4, i++) + { + int cx1 = c1 + (tri->step[0][i] << 2); + int cx2 = c2 + (tri->step[1][i] << 2); + int cx3 = c3 + (tri->step[2][i] << 2); + + if (cx1 + eo1 < 0 || + cx2 + eo2 < 0 || + cx3 + eo3 < 0) + { + } + else if (cx1 + ei1 > 0 && + cx2 + ei2 > 0 && + cx3 + ei3 > 0) + { + block_full_4(rast, tri, x+ix, y+iy); /* trivial accept */ + } + else + { + do_block_4(rast, tri, x+ix, y+iy, cx1, cx2, cx3); + } + } + } +} /* Scan the tile in chunks and figure out which pixels to rasterize * for this triangle: @@ -87,84 +144,49 @@ void lp_rast_triangle( struct lp_rasterizer *rast, { const struct lp_rast_triangle *tri = arg.triangle; - const int step = BLOCKSIZE; + int x = rast->x; + int y = rast->y; + int ix,iy,i = 0; - int ei1 = tri->ei1 * step; - int ei2 = tri->ei2 * step; - int ei3 = tri->ei3 * step; + int c1 = tri->c1 + tri->dx12 * y - tri->dy12 * x; + int c2 = tri->c2 + tri->dx23 * y - tri->dy23 * x; + int c3 = tri->c3 + tri->dx31 * y - tri->dy31 * x; - int eo1 = tri->eo1 * step; - int eo2 = tri->eo2 * step; - int eo3 = tri->eo3 * step; + int ei1 = tri->ei1 << 4; + int ei2 = tri->ei2 << 4; + int ei3 = tri->ei3 << 4; - int xstep1 = -step * tri->dy12; - int xstep2 = -step * tri->dy23; - int xstep3 = -step * tri->dy31; - - int ystep1 = step * tri->dx12; - int ystep2 = step * tri->dx23; - int ystep3 = step * tri->dx31; - - /* Clamp to tile dimensions: - */ - int minx = MAX2(tri->minx, rast->x); - int miny = MAX2(tri->miny, rast->y); - int maxx = MIN2(tri->maxx, rast->x + TILE_SIZE); - int maxy = MIN2(tri->maxy, rast->y + TILE_SIZE); - - int x, y; - int c1, c2, c3; + int eo1 = tri->eo1 << 4; + int eo2 = tri->eo2 << 4; + int eo3 = tri->eo3 << 4; debug_printf("%s\n", __FUNCTION__); - if (miny == maxy || minx == maxx) { - debug_printf("%s: non-intersecting triangle in bin\n", __FUNCTION__); - return; - } - - minx &= ~(BLOCKSIZE-1); - miny &= ~(BLOCKSIZE-1); - c1 = tri->c1 + tri->dx12 * miny - tri->dy12 * minx; - c2 = tri->c2 + tri->dx23 * miny - tri->dy23 * minx; - c3 = tri->c3 + tri->dx31 * miny - tri->dy31 * minx; - - for (y = miny; y < maxy; y += BLOCKSIZE) + for (iy = 0; iy < 64; iy+=16) { - int cx1 = c1; - int cx2 = c2; - int cx3 = c3; - - for (x = minx; x < maxx; x += BLOCKSIZE) + for (ix = 0; ix < 64; ix+=16, i++) { - if (cx1 + eo1 < 0 || - cx2 + eo2 < 0 || - cx3 + eo3 < 0) - { - } - else if (cx1 + ei1 > 0 && - cx2 + ei2 > 0 && - cx3 + ei3 > 0) - { - block_full(rast, tri, x, y); /* trivial accept */ - } - else - { - do_block(rast, tri, x, y, cx1, cx2, cx3); - } - - /* Iterate cx values across the region: - */ - cx1 += xstep1; - cx2 += xstep2; - cx3 += xstep3; + int cx1 = c1 + (tri->step[0][i] << 4); + int cx2 = c2 + (tri->step[1][i] << 4); + int cx3 = c3 + (tri->step[2][i] << 4); + + if (cx1 + eo1 < 0 || + cx2 + eo2 < 0 || + cx3 + eo3 < 0) + { + } + else if (cx1 + ei1 > 0 && + cx2 + ei2 > 0 && + cx3 + ei3 > 0) + { + block_full_16(rast, tri, x+ix, y+iy); /* trivial accept */ + } + else + { + do_block_16(rast, tri, x+ix, y+iy, cx1, cx2, cx3); + } } - - /* Iterate c values down the region: - */ - c1 += ystep1; - c2 += ystep2; - c3 += ystep3; } } -- cgit v1.2.3 From 8d752a20c6f70b442ac2210cce0fd001499be5f6 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 20 Oct 2009 08:56:58 +0100 Subject: llvmpipe: build list of 4x4 blocks to be shaded --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 38 ++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 567e223168..12ac840ef2 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -36,30 +36,33 @@ #define BLOCKSIZE 4 - +static struct { + int x; + int y; + unsigned mask; +} blocks[256]; +static int nr_blocks; /* Render a 4x4 unmasked block: */ static void block_full_4( struct lp_rasterizer *rast, - const struct lp_rast_triangle *tri, int x, int y ) { - unsigned mask = ~0; - - lp_rast_shade_quads(rast, &tri->inputs, x, y, mask); + blocks[nr_blocks].x = x; + blocks[nr_blocks].y = y; + blocks[nr_blocks].mask = ~0; + nr_blocks++; } static void block_full_16( struct lp_rasterizer *rast, - const struct lp_rast_triangle *tri, int x, int y ) { - unsigned mask = ~0; unsigned ix, iy; for (iy = 0; iy < 16; iy+=4) for (ix = 0; ix < 16; ix+=4) - lp_rast_shade_quads(rast, &tri->inputs, x + ix, y + iy , mask); + block_full_4(rast, x + ix, y + iy); } @@ -87,8 +90,12 @@ do_block_4( struct lp_rasterizer *rast, /* As we do trivial reject already, masks should rarely be all * zero: */ - if (mask) - lp_rast_shade_quads(rast, &tri->inputs, x, y, mask ); + if (mask) { + blocks[nr_blocks].x = x; + blocks[nr_blocks].y = y; + blocks[nr_blocks].mask = mask; + nr_blocks++; + } } static void @@ -126,7 +133,7 @@ do_block_16( struct lp_rasterizer *rast, cx2 + ei2 > 0 && cx3 + ei3 > 0) { - block_full_4(rast, tri, x+ix, y+iy); /* trivial accept */ + block_full_4(rast, x+ix, y+iy); /* trivial accept */ } else { @@ -162,6 +169,7 @@ void lp_rast_triangle( struct lp_rasterizer *rast, debug_printf("%s\n", __FUNCTION__); + nr_blocks = 0; for (iy = 0; iy < 64; iy+=16) { @@ -180,7 +188,7 @@ void lp_rast_triangle( struct lp_rasterizer *rast, cx2 + ei2 > 0 && cx3 + ei3 > 0) { - block_full_16(rast, tri, x+ix, y+iy); /* trivial accept */ + block_full_16(rast, x+ix, y+iy); /* trivial accept */ } else { @@ -188,5 +196,11 @@ void lp_rast_triangle( struct lp_rasterizer *rast, } } } + + for (i = 0; i < nr_blocks; i++) + lp_rast_shade_quads(rast, &tri->inputs, + blocks[i].x, + blocks[i].y, + blocks[i].mask); } -- cgit v1.2.3 From 3199c6e764c20c69a76c561b9f4b89a23e5a97f5 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 20 Oct 2009 09:44:23 +0100 Subject: llvmpipe: move block list into rast struct --- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 7 ++++++ src/gallium/drivers/llvmpipe/lp_rast_tri.c | 35 +++++++++++++---------------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 2333729807..323c046cf4 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -74,6 +74,13 @@ struct lp_rasterizer { unsigned clear_depth; char clear_stencil; } state; + + int nr_blocks; + struct { + unsigned x; + unsigned y; + unsigned mask; + } blocks[256]; }; diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 12ac840ef2..174e6ab19b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -36,22 +36,17 @@ #define BLOCKSIZE 4 -static struct { - int x; - int y; - unsigned mask; -} blocks[256]; -static int nr_blocks; /* Render a 4x4 unmasked block: */ static void block_full_4( struct lp_rasterizer *rast, int x, int y ) { - blocks[nr_blocks].x = x; - blocks[nr_blocks].y = y; - blocks[nr_blocks].mask = ~0; - nr_blocks++; + int i = rast->nr_blocks; + rast->blocks[i].x = x; + rast->blocks[i].y = y; + rast->blocks[i].mask = ~0; + rast->nr_blocks++; } @@ -86,15 +81,15 @@ do_block_4( struct lp_rasterizer *rast, (c2 + tri->step[1][i]) | (c3 + tri->step[2][i])) >> 31)) & (1 << i); - /* As we do trivial reject already, masks should rarely be all * zero: */ if (mask) { - blocks[nr_blocks].x = x; - blocks[nr_blocks].y = y; - blocks[nr_blocks].mask = mask; - nr_blocks++; + int i = rast->nr_blocks; + rast->blocks[i].x = x; + rast->blocks[i].y = y; + rast->blocks[i].mask = mask; + rast->nr_blocks++; } } @@ -169,7 +164,7 @@ void lp_rast_triangle( struct lp_rasterizer *rast, debug_printf("%s\n", __FUNCTION__); - nr_blocks = 0; + rast->nr_blocks = 0; for (iy = 0; iy < 64; iy+=16) { @@ -197,10 +192,10 @@ void lp_rast_triangle( struct lp_rasterizer *rast, } } - for (i = 0; i < nr_blocks; i++) + for (i = 0; i < rast->nr_blocks; i++) lp_rast_shade_quads(rast, &tri->inputs, - blocks[i].x, - blocks[i].y, - blocks[i].mask); + rast->blocks[i].x, + rast->blocks[i].y, + rast->blocks[i].mask); } -- cgit v1.2.3 From cccb1842092bd1b9f35aee0ac21d580c0365e4a5 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 20 Oct 2009 10:11:52 +0100 Subject: llvmpipe: minor opts to setup_tri --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 66 +++++++++++++---------------- 1 file changed, 30 insertions(+), 36 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index cf8643fc63..89d75710dd 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -221,8 +221,6 @@ static void setup_tri_coefficients( struct setup_context *setup, -/* XXX: do this by add/subtracting a large floating point number: - */ static inline int subpixel_snap( float a ) { return util_iround(FIXED_ONE * a); @@ -235,15 +233,6 @@ static INLINE void bin_triangle( struct cmd_block_list *list, } -/* to avoid having to allocate power-of-four, square render targets, - * end up having a specialized version of the above that runs only at - * the topmost level. - * - * at the topmost level there may be an arbitary number of steps on - * either dimension, so this loop needs to be either separately - * code-generated and unrolled for each render target size, or kept as - * generic looping code: - */ #define MIN3(a,b,c) MIN2(MIN2(a,b),c) #define MAX3(a,b,c) MAX2(MAX2(a,b),c) @@ -354,11 +343,6 @@ do_triangle_ccw(struct setup_context *setup, tri->ei2 = tri->dx23 - tri->dy23 - tri->eo2; tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3; - minx = tri->minx / TILESIZE; - miny = tri->miny / TILESIZE; - maxx = tri->maxx / TILESIZE; - maxy = tri->maxy / TILESIZE; - { int xstep1 = -tri->dy12; int xstep2 = -tri->dy23; @@ -370,16 +354,37 @@ do_triangle_ccw(struct setup_context *setup, int ix, iy; int i = 0; + + int c1 = 0; + int c2 = 0; + int c3 = 0; for (iy = 0; iy < 4; iy++) { + int cx1 = c1; + int cx2 = c2; + int cx3 = c3; + for (ix = 0; ix < 4; ix++, i++) { - tri->step[0][i] = xstep1 * ix + ystep1 * iy; - tri->step[1][i] = xstep2 * ix + ystep2 * iy; - tri->step[2][i] = xstep3 * ix + ystep3 * iy; + tri->step[0][i] = cx1; + tri->step[1][i] = cx2; + tri->step[2][i] = cx3; + cx1 += xstep1; + cx2 += xstep2; + cx3 += xstep3; } + + c1 += ystep1; + c2 += ystep2; + c3 += ystep3; } } + minx = tri->minx / TILESIZE; + miny = tri->miny / TILESIZE; + maxx = tri->maxx / TILESIZE; + maxy = tri->maxy / TILESIZE; + + /* Convert to tile coordinates: */ if (miny == maxy && minx == maxx) @@ -419,10 +424,7 @@ do_triangle_ccw(struct setup_context *setup, int x, y; - /* Subdivide space into NxM blocks, where each block is square and - * power-of-four in dimension. - * - * Trivially accept or reject blocks, else jump to per-pixel + /* Trivially accept or reject blocks, else jump to per-pixel * examination above. */ for (y = miny; y <= maxy; y++) @@ -430,38 +432,30 @@ do_triangle_ccw(struct setup_context *setup, int cx1 = c1; int cx2 = c2; int cx3 = c3; + int in = 0; for (x = minx; x <= maxx; x++) { - assert(cx1 == - tri->c1 + - tri->dx12 * y * TILESIZE - - tri->dy12 * x * TILESIZE); - assert(cx2 == - tri->c2 + - tri->dx23 * y * TILESIZE - - tri->dy23 * x * TILESIZE); - assert(cx3 == - tri->c3 + - tri->dx31 * y * TILESIZE - - tri->dy31 * x * TILESIZE); - if (cx1 + eo1 < 0 || cx2 + eo2 < 0 || cx3 + eo3 < 0) { /* do nothing */ + if (in) + break; } else if (cx1 + ei1 > 0 && cx2 + ei2 > 0 && cx3 + ei3 > 0) { + in = 1; /* shade whole tile */ bin_command( &setup->tile[x][y], lp_rast_shade_tile, lp_rast_arg_inputs(&tri->inputs) ); } else { + in = 1; /* shade partial tile */ bin_command( &setup->tile[x][y], lp_rast_triangle, -- cgit v1.2.3 From 341edde1d2e9f9f989d41869cc436b51942941e4 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 21 Oct 2009 14:35:54 +0100 Subject: llvmpipe: remove dead code --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 89d75710dd..041716adc9 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -227,13 +227,6 @@ static inline int subpixel_snap( float a ) } -static INLINE void bin_triangle( struct cmd_block_list *list, - const struct lp_rast_triangle arg ) -{ -} - - - #define MIN3(a,b,c) MIN2(MIN2(a,b),c) #define MAX3(a,b,c) MAX2(MAX2(a,b),c) -- cgit v1.2.3 From 694f05ac18c54253910678709f2dd35c36f1e912 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 21 Oct 2009 15:21:11 +0100 Subject: llvmpipe: remove one of two definitions of TILESIZE --- src/gallium/drivers/llvmpipe/lp_rast.c | 8 ++++---- src/gallium/drivers/llvmpipe/lp_rast.h | 2 -- src/gallium/drivers/llvmpipe/lp_setup.c | 8 ++++---- src/gallium/drivers/llvmpipe/lp_setup_context.h | 5 +++-- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 20 ++++++++++---------- src/gallium/drivers/llvmpipe/lp_tile_soa.h | 3 ++- 6 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 6fd6acc0fa..6e94e22e5b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -71,8 +71,8 @@ boolean lp_rast_begin( struct lp_rasterizer *rast, rast->state.write_zstencil = write_zstencil; rast->state.write_color = write_color; - rast->check_for_clipped_tiles = (width % TILESIZE != 0 || - height % TILESIZE != 0); + rast->check_for_clipped_tiles = (width % TILE_SIZE != 0 || + height % TILE_SIZE != 0); if (cbuf) { rast->cbuf_transfer = screen->get_tex_transfer(rast->screen, @@ -311,8 +311,8 @@ static void lp_rast_store_color( struct lp_rasterizer *rast ) { const unsigned x = rast->x; const unsigned y = rast->y; - unsigned w = TILESIZE; - unsigned h = TILESIZE; + unsigned w = TILE_SIZE; + unsigned h = TILE_SIZE; if (x + w > rast->width) w -= x + w - rast->width; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 318bf73b15..282b9a46d1 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -41,8 +41,6 @@ struct pipe_screen; #define FIXED_ORDER 4 #define FIXED_ONE (1<tiles_y; j++) { lp_rast_start_tile( rast, - i * TILESIZE, - j * TILESIZE ); + i * TILE_SIZE, + j * TILE_SIZE ); for (block = setup->tile[i][j].head; block; block = block->next) { for (k = 0; k < block->count; k++) { @@ -241,8 +241,8 @@ begin_binning( struct setup_context *setup ) setup->fb.zsbuf->height); } - setup->tiles_x = align(setup->fb.width, TILESIZE) / TILESIZE; - setup->tiles_y = align(setup->fb.height, TILESIZE) / TILESIZE; + setup->tiles_x = align(setup->fb.width, TILE_SIZE) / TILE_SIZE; + setup->tiles_y = align(setup->fb.height, TILE_SIZE) / TILE_SIZE; if (setup->fb.cbuf) { if (setup->clear.flags & PIPE_CLEAR_COLOR) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index d91ffc7c20..938f6ce262 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -30,14 +30,15 @@ #include "lp_setup.h" #include "lp_rast.h" +#include "lp_tile_soa.h" /* for TILE_SIZE */ /* We're limited to 2K by 2K for 32bit fixed point rasterization. * Will need a 64-bit version for larger framebuffers. */ #define MAXHEIGHT 2048 #define MAXWIDTH 2048 -#define TILES_X (MAXWIDTH / TILESIZE) -#define TILES_Y (MAXHEIGHT / TILESIZE) +#define TILES_X (MAXWIDTH / TILE_SIZE) +#define TILES_Y (MAXHEIGHT / TILE_SIZE) #define CMD_BLOCK_MAX 128 #define DATA_BLOCK_SIZE (16 * 1024 - sizeof(unsigned) - sizeof(void *)) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 041716adc9..f2665c11df 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -372,10 +372,10 @@ do_triangle_ccw(struct setup_context *setup, } } - minx = tri->minx / TILESIZE; - miny = tri->miny / TILESIZE; - maxx = tri->maxx / TILESIZE; - maxy = tri->maxy / TILESIZE; + minx = tri->minx / TILE_SIZE; + miny = tri->miny / TILE_SIZE; + maxx = tri->maxx / TILE_SIZE; + maxy = tri->maxy / TILE_SIZE; /* Convert to tile coordinates: @@ -390,14 +390,14 @@ do_triangle_ccw(struct setup_context *setup, else { int c1 = (tri->c1 + - tri->dx12 * miny * TILESIZE - - tri->dy12 * minx * TILESIZE); + tri->dx12 * miny * TILE_SIZE - + tri->dy12 * minx * TILE_SIZE); int c2 = (tri->c2 + - tri->dx23 * miny * TILESIZE - - tri->dy23 * minx * TILESIZE); + tri->dx23 * miny * TILE_SIZE - + tri->dy23 * minx * TILE_SIZE); int c3 = (tri->c3 + - tri->dx31 * miny * TILESIZE - - tri->dy31 * minx * TILESIZE); + tri->dx31 * miny * TILE_SIZE - + tri->dy31 * minx * TILE_SIZE); int ei1 = tri->ei1 << TILE_ORDER; int ei2 = tri->ei2 << TILE_ORDER; diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h index d72d6d2ef1..0e874ce451 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h @@ -43,7 +43,8 @@ struct pipe_transfer; /** * Cache tile size (width and height). This needs to be a power of two. */ -#define TILE_SIZE 64 +#define TILE_ORDER 6 +#define TILE_SIZE (1< Date: Thu, 22 Oct 2009 17:21:37 +0100 Subject: llvmpipe: fix the worst of the depth regressions since switch to 4x4 --- src/gallium/drivers/llvmpipe/lp_rast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 6e94e22e5b..32cd5e09f5 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -265,7 +265,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, /* depth buffer */ assert((x % 2) == 0); assert((y % 2) == 0); - depth = tile->depth + iy*TILE_SIZE + 2*ix; + depth = tile->depth + (iy/4)*(16*16) + (ix/4)*16; /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ assert(lp_check_alignment(masks, 16)); -- cgit v1.2.3 From bfa1a766d6df39963daf54fbc63a84d9c139ec7e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 30 Nov 2009 10:43:23 -0700 Subject: llvmpipe: update C_SOURCES --- src/gallium/drivers/llvmpipe/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 8f05e5a6fd..bfe34396d9 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -33,7 +33,10 @@ C_SOURCES = \ lp_flush.c \ lp_jit.c \ lp_prim_vbuf.c \ + lp_rast.c \ + lp_rast_tri.c \ lp_setup.c \ + lp_setup_tri.c \ lp_query.c \ lp_screen.c \ lp_state_blend.c \ @@ -46,8 +49,6 @@ C_SOURCES = \ lp_state_vertex.c \ lp_state_vs.c \ lp_surface.c \ - lp_tex_cache.c \ - lp_tex_sample_c.c \ lp_tex_sample_llvm.c \ lp_texture.c \ lp_tile_soa.c -- cgit v1.2.3 From 7d042ac2a285c220a396d91a6dbe5c7f4e697c71 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 30 Nov 2009 14:01:31 -0700 Subject: llvmpipe: minor refactoring of bin rasterization code --- src/gallium/drivers/llvmpipe/lp_setup.c | 44 +++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 6d84147468..7091232350 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -170,13 +170,35 @@ static void bin_everywhere( struct setup_context *setup, } +/** Rasterize commands for a single bin */ +static void +rasterize_bin( struct lp_rasterizer *rast, + struct cmd_block_list *commands, + int x, int y) +{ + struct cmd_block *block; + unsigned k; + + lp_rast_start_tile( rast, x, y ); + + /* simply execute each of the commands in the block list */ + for (block = commands->head; block; block = block->next) { + for (k = 0; k < block->count; k++) { + block->cmd[k]( rast, block->arg[k] ); + } + } + + lp_rast_end_tile( rast ); +} + + +/** Rasterize all tile's bins */ static void rasterize_bins( struct setup_context *setup, boolean write_depth ) { struct lp_rasterizer *rast = setup->rast; - struct cmd_block *block; - unsigned i,j,k; + unsigned i, j; SETUP_DEBUG("%s\n", __FUNCTION__); @@ -187,23 +209,13 @@ rasterize_bins( struct setup_context *setup, setup->fb.zsbuf != NULL && write_depth, setup->fb.width, setup->fb.height ); - - + /* loop over tile bins, rasterize each */ for (i = 0; i < setup->tiles_x; i++) { for (j = 0; j < setup->tiles_y; j++) { - - lp_rast_start_tile( rast, - i * TILE_SIZE, - j * TILE_SIZE ); - - for (block = setup->tile[i][j].head; block; block = block->next) { - for (k = 0; k < block->count; k++) { - block->cmd[k]( rast, block->arg[k] ); - } - } - - lp_rast_end_tile( rast ); + rasterize_bin( rast, &setup->tile[i][j], + i * TILE_SIZE, + j * TILE_SIZE ); } } -- cgit v1.2.3 From 7505510c7b7c33f3c571647c0398da7e1b823806 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 30 Nov 2009 14:02:01 -0700 Subject: llvmpipe: add a bunch of comments --- src/gallium/drivers/llvmpipe/lp_rast.c | 30 ++++++++++++++++++++++++- src/gallium/drivers/llvmpipe/lp_rast.h | 5 +++-- src/gallium/drivers/llvmpipe/lp_setup_context.h | 6 +++++ src/gallium/drivers/llvmpipe/lp_setup_tri.c | 26 ++++++++++++--------- 4 files changed, 54 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 32cd5e09f5..09495f6288 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -51,6 +51,10 @@ struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen ) } +/** + * Begin the rasterization phase. + * Map the framebuffer surfaces. Initialize the 'rast' state. + */ boolean lp_rast_begin( struct lp_rasterizer *rast, struct pipe_surface *cbuf, struct pipe_surface *zsbuf, @@ -95,6 +99,10 @@ boolean lp_rast_begin( struct lp_rasterizer *rast, } +/** + * Finish the rasterization phase. + * Unmap framebuffer surfaces. + */ void lp_rast_end( struct lp_rasterizer *rast ) { struct pipe_screen *screen = rast->screen; @@ -120,7 +128,10 @@ void lp_rast_end( struct lp_rasterizer *rast ) -/* Begining of each tile: +/** + * Begining rasterization of a tile. + * \param x window X position of the tile, in pixels + * \param y window Y position of the tile, in pixels */ void lp_rast_start_tile( struct lp_rasterizer *rast, unsigned x, @@ -132,6 +143,10 @@ void lp_rast_start_tile( struct lp_rasterizer *rast, rast->y = y; } + +/** + * Clear the rasterizer's current color tile. + */ void lp_rast_clear_color( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { @@ -157,6 +172,10 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, } } + +/** + * Clear the rasterizer's current z/stencil tile. + */ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg) { @@ -307,6 +326,9 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, */ +/** + * Write the rasterizer's color tile to the framebuffer. + */ static void lp_rast_store_color( struct lp_rasterizer *rast ) { const unsigned x = rast->x; @@ -331,6 +353,9 @@ static void lp_rast_store_color( struct lp_rasterizer *rast ) } +/** + * Write the rasterizer's z/stencil tile to the framebuffer. + */ static void lp_rast_store_zstencil( struct lp_rasterizer *rast ) { RAST_DEBUG("%s\n", __FUNCTION__); @@ -339,6 +364,9 @@ static void lp_rast_store_zstencil( struct lp_rasterizer *rast ) } +/** + * Write the rasterizer's tiles to the framebuffer. + */ void lp_rast_end_tile( struct lp_rasterizer *rast ) { RAST_DEBUG("%s\n", __FUNCTION__); diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 282b9a46d1..a50b73b27f 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -79,6 +79,7 @@ struct lp_rast_shader_inputs { * plus inputs to run the shader: */ struct lp_rast_triangle { + /* bounding box of tri (in pixels) */ int minx; int maxx; int miny; @@ -94,12 +95,12 @@ struct lp_rast_triangle { int eo2; int eo3; - /* y deltas for vertex pairs */ + /* y deltas for vertex pairs (in fixed pt) */ int dy12; int dy23; int dy31; - /* x deltas for vertex pairs */ + /* x deltas for vertex pairs (in fixed pt) */ int dx12; int dx23; int dx31; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 938f6ce262..3209e41c01 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -87,6 +87,7 @@ struct setup_context { struct cmd_block_list tile[TILES_X][TILES_Y]; struct data_block_list data; + /* size of framebuffer, in tiles */ unsigned tiles_x; unsigned tiles_y; @@ -154,6 +155,11 @@ void lp_setup_choose_point( struct setup_context *setup ); void lp_setup_new_data_block( struct data_block_list *list ); void lp_setup_new_cmd_block( struct cmd_block_list *list ); + +/** + * Allocate space for a command/data in the given block list. + * Grow the block list if needed. + */ static INLINE void *get_data( struct data_block_list *list, unsigned size) { diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index f2665c11df..cf86255406 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -230,6 +230,11 @@ static inline int subpixel_snap( float a ) #define MIN3(a,b,c) MIN2(MIN2(a,b),c) #define MAX3(a,b,c) MAX2(MAX2(a,b),c) +/** + * Do basic setup for triangle rasterization and determine which + * framebuffer tiles are touched. Put the triangle in the bins for the + * tiles which we overlap. + */ static void do_triangle_ccw(struct setup_context *setup, const float (*v1)[4], @@ -237,15 +242,14 @@ do_triangle_ccw(struct setup_context *setup, const float (*v3)[4], boolean frontfacing ) { - + /* x/y positions in fixed point */ + const int x1 = subpixel_snap(v1[0][0]); + const int x2 = subpixel_snap(v2[0][0]); + const int x3 = subpixel_snap(v3[0][0]); const int y1 = subpixel_snap(v1[0][1]); const int y2 = subpixel_snap(v2[0][1]); const int y3 = subpixel_snap(v3[0][1]); - const int x1 = subpixel_snap(v1[0][0]); - const int x2 = subpixel_snap(v2[0][0]); - const int x3 = subpixel_snap(v3[0][0]); - struct lp_rast_triangle *tri = get_data( &setup->data, sizeof *tri ); float area; int minx, maxx, miny, maxy; @@ -270,7 +274,7 @@ do_triangle_ccw(struct setup_context *setup, return; } - // Bounding rectangle + /* Bounding rectangle (in pixels) */ tri->minx = (MIN3(x1, x2, x3) + 0xf) >> FIXED_ORDER; tri->maxx = (MAX3(x1, x2, x3) + 0xf) >> FIXED_ORDER; tri->miny = (MIN3(y1, y2, y3) + 0xf) >> FIXED_ORDER; @@ -372,13 +376,14 @@ do_triangle_ccw(struct setup_context *setup, } } + /* Convert to tile coordinates: + */ minx = tri->minx / TILE_SIZE; miny = tri->miny / TILE_SIZE; maxx = tri->maxx / TILE_SIZE; maxy = tri->maxy / TILE_SIZE; - - /* Convert to tile coordinates: + /* Determine which tile(s) intersect the triangle's bounding box */ if (miny == maxy && minx == maxx) { @@ -442,8 +447,9 @@ do_triangle_ccw(struct setup_context *setup, cx3 + ei3 > 0) { in = 1; - /* shade whole tile */ - bin_command( &setup->tile[x][y], lp_rast_shade_tile, + /* triangle covers the whole tile- shade whole tile */ + bin_command( &setup->tile[x][y], + lp_rast_shade_tile, lp_rast_arg_inputs(&tri->inputs) ); } else -- cgit v1.2.3 From 344a2a9abbe22236e8511d8166023bf81c85b03f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 30 Nov 2009 15:31:30 -0700 Subject: progs/trivial: added two simple texture tests One draws a series of quads with different textures. The other draws with one texture which is changed with glTexSubImage2D(). --- progs/trivial/Makefile | 2 + progs/trivial/SConscript | 2 + progs/trivial/sub-tex.c | 137 ++++++++++++++++++++++++++++++++++++++++++++ progs/trivial/tex-quads.c | 143 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 284 insertions(+) create mode 100644 progs/trivial/sub-tex.c create mode 100644 progs/trivial/tex-quads.c diff --git a/progs/trivial/Makefile b/progs/trivial/Makefile index 70728616d2..a78837611c 100644 --- a/progs/trivial/Makefile +++ b/progs/trivial/Makefile @@ -90,6 +90,8 @@ SOURCES = \ quadstrip-flat.c \ quadstrip.c \ readpixels.c \ + sub-tex.c \ + tex-quads.c \ tri-alpha.c \ tri-alpha-tex.c \ tri-array-interleaved.c \ diff --git a/progs/trivial/SConscript b/progs/trivial/SConscript index 9a1f3575bd..37a53293bf 100644 --- a/progs/trivial/SConscript +++ b/progs/trivial/SConscript @@ -77,6 +77,8 @@ progs = [ 'quadstrip-cont', 'quadstrip-flat', 'quadstrip', + 'sub-tex', + 'tex-quads', 'tri-alpha', 'tri-blend-color', 'tri-blend-max', diff --git a/progs/trivial/sub-tex.c b/progs/trivial/sub-tex.c new file mode 100644 index 0000000000..0b8bb28182 --- /dev/null +++ b/progs/trivial/sub-tex.c @@ -0,0 +1,137 @@ +/** + * Draw a series of textured quads after each quad, use glTexSubImage() + * to change one row of the texture image. + */ + +#include +#include +#include +#include +#include + + +static GLint Win = 0; +static GLuint Tex = 0; + + +static void Init(void) +{ + fprintf(stderr, "GL_RENDERER = %s\n", (char *) glGetString(GL_RENDERER)); + fflush(stderr); + + glGenTextures(1, &Tex); + glBindTexture(GL_TEXTURE_2D, Tex); + + glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); +} + + +static void Reshape(int width, int height) +{ + float ar = (float) width / height; + glViewport(0, 0, width, height); + glMatrixMode(GL_PROJECTION); + glLoadIdentity(); + glOrtho(-ar, ar, -1.0, 1.0, -1.0, 1.0); + glMatrixMode(GL_MODELVIEW); +} + + +static void Key(unsigned char key, int x, int y) +{ + if (key == 27) { + glDeleteTextures(1, &Tex); + glutDestroyWindow(Win); + exit(1); + } + glutPostRedisplay(); +} + + +static void Draw(void) +{ + GLubyte tex[16][16][4]; + GLubyte row[16][4]; + int i, j; + + for (i = 0; i < 16; i++) { + for (j = 0; j < 16; j++) { + if ((i + j) & 1) { + tex[i][j][0] = 128; + tex[i][j][1] = 128; + tex[i][j][2] = 128; + tex[i][j][3] = 255; + } + else { + tex[i][j][0] = 255; + tex[i][j][1] = 255; + tex[i][j][2] = 255; + tex[i][j][3] = 255; + } + } + } + + for (i = 0; i < 16; i++) { + row[i][0] = 255; + row[i][1] = 0; + row[i][2] = 0; + row[i][3] = 255; + } + + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 16, 16, 0, + GL_RGBA, GL_UNSIGNED_BYTE, tex); + glEnable(GL_TEXTURE_2D); + + glClear(GL_COLOR_BUFFER_BIT); + + for (i = 0; i < 9; i++) { + + glPushMatrix(); + glTranslatef(-4.0 + i, 0, 0); + glScalef(0.5, 0.5, 1.0); + + glBegin(GL_QUADS); + glTexCoord2f(1,0); + glVertex3f( 0.9, -0.9, 0.0); + glTexCoord2f(1,1); + glVertex3f( 0.9, 0.9, 0.0); + glTexCoord2f(0,1); + glVertex3f(-0.9, 0.9, 0.0); + glTexCoord2f(0,0); + glVertex3f(-0.9, -0.9, 0.0); + glEnd(); + + glPopMatrix(); + + /* replace a row of the texture image with red texels */ + if (i * 2 < 16) + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, i*2, 16, 1, + GL_RGBA, GL_UNSIGNED_BYTE, row); + } + + + glutSwapBuffers(); +} + + +int main(int argc, char **argv) +{ + glutInit(&argc, argv); + glutInitWindowSize(900, 200); + glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE); + Win = glutCreateWindow(*argv); + if (!Win) { + exit(1); + } + glewInit(); + Init(); + glutReshapeFunc(Reshape); + glutKeyboardFunc(Key); + glutDisplayFunc(Draw); + glutMainLoop(); + return 0; +} diff --git a/progs/trivial/tex-quads.c b/progs/trivial/tex-quads.c new file mode 100644 index 0000000000..626e178b87 --- /dev/null +++ b/progs/trivial/tex-quads.c @@ -0,0 +1,143 @@ +/** + * Draw a series of quads, each with a different texture. + */ + +#include +#include +#include +#include +#include + +#define NUM_TEX 10 + +static GLint Win = 0; +static GLuint Tex[NUM_TEX]; + + +static void Init(void) +{ + int i; + + fprintf(stderr, "GL_RENDERER = %s\n", (char *) glGetString(GL_RENDERER)); + fflush(stderr); + + glGenTextures(NUM_TEX, Tex); + + for (i = 0; i < NUM_TEX; i++) { + glBindTexture(GL_TEXTURE_2D, Tex[i]); + + glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + } +} + + +static void Reshape(int width, int height) +{ + float ar = (float) width / height; + glViewport(0, 0, width, height); + glMatrixMode(GL_PROJECTION); + glLoadIdentity(); + glOrtho(-ar, ar, -1.0, 1.0, -1.0, 1.0); + glMatrixMode(GL_MODELVIEW); +} + + +static void Key(unsigned char key, int x, int y) +{ + if (key == 27) { + glDeleteTextures(NUM_TEX, Tex); + glutDestroyWindow(Win); + exit(1); + } + glutPostRedisplay(); +} + + +static void Draw(void) +{ + GLubyte tex[16][16][4]; + int t, i, j; + + for (t = 0; t < NUM_TEX; t++) { + + for (i = 0; i < 16; i++) { + for (j = 0; j < 16; j++) { + if (i < t) { + /* red row */ + tex[i][j][0] = 255; + tex[i][j][1] = 0; + tex[i][j][2] = 0; + tex[i][j][3] = 255; + } + else if ((i + j) & 1) { + tex[i][j][0] = 128; + tex[i][j][1] = 128; + tex[i][j][2] = 128; + tex[i][j][3] = 255; + } + else { + tex[i][j][0] = 255; + tex[i][j][1] = 255; + tex[i][j][2] = 255; + tex[i][j][3] = 255; + } + } + } + + glBindTexture(GL_TEXTURE_2D, Tex[t]); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 16, 16, 0, + GL_RGBA, GL_UNSIGNED_BYTE, tex); + } + + glEnable(GL_TEXTURE_2D); + + glClear(GL_COLOR_BUFFER_BIT); + + for (i = 0; i < NUM_TEX; i++) { + + glBindTexture(GL_TEXTURE_2D, Tex[i]); + + glPushMatrix(); + glTranslatef(-4.0 + i, 0, 0); + glScalef(0.5, 0.5, 1.0); + + glBegin(GL_QUADS); + glTexCoord2f(1,0); + glVertex3f( 0.9, -0.9, 0.0); + glTexCoord2f(1,1); + glVertex3f( 0.9, 0.9, 0.0); + glTexCoord2f(0,1); + glVertex3f(-0.9, 0.9, 0.0); + glTexCoord2f(0,0); + glVertex3f(-0.9, -0.9, 0.0); + glEnd(); + + glPopMatrix(); + } + + + glutSwapBuffers(); +} + + +int main(int argc, char **argv) +{ + glutInit(&argc, argv); + glutInitWindowSize(900, 200); + glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE); + Win = glutCreateWindow(*argv); + if (!Win) { + exit(1); + } + glewInit(); + Init(); + glutReshapeFunc(Reshape); + glutKeyboardFunc(Key); + glutDisplayFunc(Draw); + glutMainLoop(); + return 0; +} -- cgit v1.2.3 From 63b1f23b3eb6fceaff7c2ceed925ef57f63f9fa2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 1 Dec 2009 15:43:01 -0700 Subject: llvmpipe: comments, reformatting and assertions in tri rast code --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 117 +++++++++++++++++------------ 1 file changed, 70 insertions(+), 47 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 174e6ab19b..428870bb04 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -37,12 +37,16 @@ #define BLOCKSIZE 4 -/* Render a 4x4 unmasked block: +/** + * Add a 4x4 block of pixels to the block list. + * All pixels are known to be inside the triangle's bounds. */ -static void block_full_4( struct lp_rasterizer *rast, - int x, int y ) +static void +block_full_4( struct lp_rasterizer *rast, int x, int y ) { int i = rast->nr_blocks; + assert(x % 4 == 0); + assert(y % 4 == 0); rast->blocks[i].x = x; rast->blocks[i].y = y; rast->blocks[i].mask = ~0; @@ -50,20 +54,26 @@ static void block_full_4( struct lp_rasterizer *rast, } -static void block_full_16( struct lp_rasterizer *rast, - int x, int y ) +/** + * Add a 16x16 block of pixels to the block list. + * All pixels are known to be inside the triangle's bounds. + */ +static void +block_full_16( struct lp_rasterizer *rast, int x, int y ) { unsigned ix, iy; - - for (iy = 0; iy < 16; iy+=4) - for (ix = 0; ix < 16; ix+=4) + assert(x % 16 == 0); + assert(y % 16 == 0); + for (iy = 0; iy < 16; iy += 4) + for (ix = 0; ix < 16; ix += 4) block_full_4(rast, x + ix, y + iy); } - -/* Evaluate each pixel in a block, generate a mask and possibly render - * the quad: +/** + * Evaluate each pixel in a 4x4 block to determine if it lies within + * the triangle's bounds. + * Generate a mask of in/out flags and add the block to the blocks list. */ static void do_block_4( struct lp_rasterizer *rast, @@ -76,13 +86,15 @@ do_block_4( struct lp_rasterizer *rast, int i; unsigned mask = 0; + assert(x % 4 == 0); + assert(y % 4 == 0); + for (i = 0; i < 16; i++) mask |= (~(((c1 + tri->step[0][i]) | (c2 + tri->step[1][i]) | (c3 + tri->step[2][i])) >> 31)) & (1 << i); - /* As we do trivial reject already, masks should rarely be all - * zero: + /* As we do trivial reject already, masks should rarely be all zero: */ if (mask) { int i = rast->nr_blocks; @@ -93,15 +105,20 @@ do_block_4( struct lp_rasterizer *rast, } } + +/** + * Evaluate a 16x16 block of pixels to determine which 4x4 subblocks are in/out + * of the triangle's bounds. + */ static void do_block_16( struct lp_rasterizer *rast, - const struct lp_rast_triangle *tri, - int x, int y, - int c1, - int c2, - int c3 ) + const struct lp_rast_triangle *tri, + int x, int y, + int c1, + int c2, + int c3 ) { - int ix,iy,i = 0; + int ix, iy, i = 0; int ei1 = tri->ei1 << 2; int ei2 = tri->ei2 << 2; @@ -111,44 +128,48 @@ do_block_16( struct lp_rasterizer *rast, int eo2 = tri->eo2 << 2; int eo3 = tri->eo3 << 2; - for (iy = 0; iy < 16; iy+=4) - { - for (ix = 0; ix < 16; ix+=4, i++) - { + assert(x % 16 == 0); + assert(y % 16 == 0); + + for (iy = 0; iy < 16; iy+=4) { + for (ix = 0; ix < 16; ix+=4, i++) { int cx1 = c1 + (tri->step[0][i] << 2); int cx2 = c2 + (tri->step[1][i] << 2); int cx3 = c3 + (tri->step[2][i] << 2); if (cx1 + eo1 < 0 || cx2 + eo2 < 0 || - cx3 + eo3 < 0) - { + cx3 + eo3 < 0) { + /* the block is completely outside the triangle - nop */ } else if (cx1 + ei1 > 0 && cx2 + ei2 > 0 && - cx3 + ei3 > 0) - { - block_full_4(rast, x+ix, y+iy); /* trivial accept */ + cx3 + ei3 > 0) { + /* the block is completely inside the triangle */ + block_full_4(rast, x+ix, y+iy); } - else - { + else { + /* the block is partially in/out of the triangle */ do_block_4(rast, tri, x+ix, y+iy, cx1, cx2, cx3); } } } } -/* Scan the tile in chunks and figure out which pixels to rasterize - * for this triangle: + +/** + * Scan the tile in chunks and figure out which pixels to rasterize + * for this triangle. */ -void lp_rast_triangle( struct lp_rasterizer *rast, - const union lp_rast_cmd_arg arg ) +void +lp_rast_triangle( struct lp_rasterizer *rast, + const union lp_rast_cmd_arg arg ) { const struct lp_rast_triangle *tri = arg.triangle; int x = rast->x; int y = rast->y; - int ix,iy,i = 0; + int ix, iy, i = 0; int c1 = tri->c1 + tri->dx12 * y - tri->dy12 * x; int c2 = tri->c2 + tri->dx23 * y - tri->dy23 * x; @@ -166,36 +187,38 @@ void lp_rast_triangle( struct lp_rasterizer *rast, rast->nr_blocks = 0; - for (iy = 0; iy < 64; iy+=16) - { - for (ix = 0; ix < 64; ix+=16, i++) - { + /* Walk over the tile to build a list of 4x4 pixel blocks which will + * be filled/shaded. We do this at two granularities: 16x16 blocks + * and then 4x4 blocks. + */ + for (iy = 0; iy < TILE_SIZE; iy += 16) { + for (ix = 0; ix < TILE_SIZE; ix += 16, i++) { int cx1 = c1 + (tri->step[0][i] << 4); int cx2 = c2 + (tri->step[1][i] << 4); int cx3 = c3 + (tri->step[2][i] << 4); if (cx1 + eo1 < 0 || cx2 + eo2 < 0 || - cx3 + eo3 < 0) - { + cx3 + eo3 < 0) { + /* the block is completely outside the triangle - nop */ } else if (cx1 + ei1 > 0 && cx2 + ei2 > 0 && - cx3 + ei3 > 0) - { - block_full_16(rast, x+ix, y+iy); /* trivial accept */ + cx3 + ei3 > 0) { + /* the block is completely inside the triangle */ + block_full_16(rast, x+ix, y+iy); } - else - { + else { + /* the block is partially in/out of the triangle */ do_block_16(rast, tri, x+ix, y+iy, cx1, cx2, cx3); } } } + /* Shade the 4x4 pixel blocks */ for (i = 0; i < rast->nr_blocks; i++) lp_rast_shade_quads(rast, &tri->inputs, rast->blocks[i].x, rast->blocks[i].y, rast->blocks[i].mask); } - -- cgit v1.2.3 From 938acf0367416c989d49f231da855ba6ea8d64ca Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 1 Dec 2009 16:00:04 -0700 Subject: llvmpipe: make nr_blocks unsigned --- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 2 +- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 323c046cf4..723bb5ad69 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -75,7 +75,7 @@ struct lp_rasterizer { char clear_stencil; } state; - int nr_blocks; + unsigned nr_blocks; struct { unsigned x; unsigned y; diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 428870bb04..07b0eccf1e 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -44,7 +44,7 @@ static void block_full_4( struct lp_rasterizer *rast, int x, int y ) { - int i = rast->nr_blocks; + const unsigned i = rast->nr_blocks; assert(x % 4 == 0); assert(y % 4 == 0); rast->blocks[i].x = x; @@ -97,7 +97,7 @@ do_block_4( struct lp_rasterizer *rast, /* As we do trivial reject already, masks should rarely be all zero: */ if (mask) { - int i = rast->nr_blocks; + const unsigned i = rast->nr_blocks; rast->blocks[i].x = x; rast->blocks[i].y = y; rast->blocks[i].mask = mask; @@ -169,7 +169,8 @@ lp_rast_triangle( struct lp_rasterizer *rast, int x = rast->x; int y = rast->y; - int ix, iy, i = 0; + int ix, iy; + unsigned i = 0; int c1 = tri->c1 + tri->dx12 * y - tri->dy12 * x; int c2 = tri->c2 + tri->dx23 * y - tri->dy23 * x; -- cgit v1.2.3 From 6bde3e0fdfe4bfc3fee33ca0d1d8e13969eb8952 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 1 Dec 2009 15:51:07 -0700 Subject: llvmpipe: replace shifts with multiplies to be clearer The compiler will still do the multiplies with shifts. It's just a bit easier to follow the logic with multiplies. --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 36 +++++++++++++++--------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 07b0eccf1e..9543b86ecd 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -120,22 +120,22 @@ do_block_16( struct lp_rasterizer *rast, { int ix, iy, i = 0; - int ei1 = tri->ei1 << 2; - int ei2 = tri->ei2 << 2; - int ei3 = tri->ei3 << 2; + int ei1 = tri->ei1 * 4; + int ei2 = tri->ei2 * 4; + int ei3 = tri->ei3 * 4; - int eo1 = tri->eo1 << 2; - int eo2 = tri->eo2 << 2; - int eo3 = tri->eo3 << 2; + int eo1 = tri->eo1 * 4; + int eo2 = tri->eo2 * 4; + int eo3 = tri->eo3 * 4; assert(x % 16 == 0); assert(y % 16 == 0); for (iy = 0; iy < 16; iy+=4) { for (ix = 0; ix < 16; ix+=4, i++) { - int cx1 = c1 + (tri->step[0][i] << 2); - int cx2 = c2 + (tri->step[1][i] << 2); - int cx3 = c3 + (tri->step[2][i] << 2); + int cx1 = c1 + (tri->step[0][i] * 4); + int cx2 = c2 + (tri->step[1][i] * 4); + int cx3 = c3 + (tri->step[2][i] * 4); if (cx1 + eo1 < 0 || cx2 + eo2 < 0 || @@ -176,13 +176,13 @@ lp_rast_triangle( struct lp_rasterizer *rast, int c2 = tri->c2 + tri->dx23 * y - tri->dy23 * x; int c3 = tri->c3 + tri->dx31 * y - tri->dy31 * x; - int ei1 = tri->ei1 << 4; - int ei2 = tri->ei2 << 4; - int ei3 = tri->ei3 << 4; + int ei1 = tri->ei1 * 16; + int ei2 = tri->ei2 * 16; + int ei3 = tri->ei3 * 16; - int eo1 = tri->eo1 << 4; - int eo2 = tri->eo2 << 4; - int eo3 = tri->eo3 << 4; + int eo1 = tri->eo1 * 16; + int eo2 = tri->eo2 * 16; + int eo3 = tri->eo3 * 16; debug_printf("%s\n", __FUNCTION__); @@ -194,9 +194,9 @@ lp_rast_triangle( struct lp_rasterizer *rast, */ for (iy = 0; iy < TILE_SIZE; iy += 16) { for (ix = 0; ix < TILE_SIZE; ix += 16, i++) { - int cx1 = c1 + (tri->step[0][i] << 4); - int cx2 = c2 + (tri->step[1][i] << 4); - int cx3 = c3 + (tri->step[2][i] << 4); + int cx1 = c1 + (tri->step[0][i] * 16); + int cx2 = c2 + (tri->step[1][i] * 16); + int cx3 = c3 + (tri->step[2][i] * 16); if (cx1 + eo1 < 0 || cx2 + eo2 < 0 || -- cgit v1.2.3 From 9c486774913f66c6496cd43cfd9dbd992c28d8cb Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 1 Dec 2009 16:24:16 -0700 Subject: llvmpipe: simplify mask computation Make this a little easier to understand. --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 9543b86ecd..f6cb628ed4 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -89,10 +89,12 @@ do_block_4( struct lp_rasterizer *rast, assert(x % 4 == 0); assert(y % 4 == 0); - for (i = 0; i < 16; i++) - mask |= (~(((c1 + tri->step[0][i]) | - (c2 + tri->step[1][i]) | - (c3 + tri->step[2][i])) >> 31)) & (1 << i); + for (i = 0; i < 16; i++) { + int any_negative = ((c1 + tri->step[0][i]) | + (c2 + tri->step[1][i]) | + (c3 + tri->step[2][i])) >> 31; + mask |= (~any_negative) & (1 << i); + } /* As we do trivial reject already, masks should rarely be all zero: */ -- cgit v1.2.3 From 63fe997e28b5bfee1f776a220d121987a5fee62e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 1 Dec 2009 16:24:57 -0700 Subject: llvmpipe: added assertions And remove unused BLOCKSIZE. --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index f6cb628ed4..e772a0158a 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -34,9 +34,6 @@ #include "lp_tile_soa.h" -#define BLOCKSIZE 4 - - /** * Add a 4x4 block of pixels to the block list. * All pixels are known to be inside the triangle's bounds. @@ -186,6 +183,8 @@ lp_rast_triangle( struct lp_rasterizer *rast, int eo2 = tri->eo2 * 16; int eo3 = tri->eo3 * 16; + assert(Elements(rast->blocks) == (TILE_SIZE * TILE_SIZE) / (4*4)); + debug_printf("%s\n", __FUNCTION__); rast->nr_blocks = 0; @@ -218,6 +217,8 @@ lp_rast_triangle( struct lp_rasterizer *rast, } } + assert(rast->nr_blocks <= Elements(rast->blocks)); + /* Shade the 4x4 pixel blocks */ for (i = 0; i < rast->nr_blocks; i++) lp_rast_shade_quads(rast, &tri->inputs, -- cgit v1.2.3 From 5750a6426bc8d47f9801be5896b2d0f5ae3a5b12 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 2 Dec 2009 14:55:48 -0700 Subject: llvmpipe: whitespace for readability --- src/gallium/drivers/llvmpipe/lp_tile_soa.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h index 0e874ce451..660cc30c82 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h @@ -44,7 +44,7 @@ struct pipe_transfer; * Cache tile size (width and height). This needs to be a power of two. */ #define TILE_ORDER 6 -#define TILE_SIZE (1< Date: Wed, 2 Dec 2009 15:13:45 -0700 Subject: llvmpipe: execute shaders on 4x4 blocks instead of 8x2 This matches the convention used by the recursive rasterizer. Also fixed assorted typos, comments, etc. Now tri-z.c, gears.c, etc look basically right but there's still some cracks in triangle rasterization. --- src/gallium/drivers/llvmpipe/lp_bld_interp.c | 135 ++++++++++++++++++--------- src/gallium/drivers/llvmpipe/lp_bld_interp.h | 10 +- src/gallium/drivers/llvmpipe/lp_rast.c | 22 +++-- src/gallium/drivers/llvmpipe/lp_state_fs.c | 12 ++- 4 files changed, 116 insertions(+), 63 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index 338dbca6d1..affeeca6ff 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -45,6 +45,36 @@ #include "lp_bld_interp.h" +/* + * The shader JIT function operates on blocks of quads. + * Each block has 2x2 quads and each quad has 2x2 pixels. + * + * We iterate over the quads in order 0, 1, 2, 3: + * + * ################# + * # | # | # + * #---0---#---1---# + * # | # | # + * ################# + * # | # | # + * #---2---#---3---# + * # | # | # + * ################# + * + * Within each quad, we have four pixels which are represented in SOA + * order: + * + * ######### + * # 0 | 1 # + * #---+---# + * # 2 | 3 # + * ######### + * + * So the green channel (for example) of the four pixels is stored in + * a single vector register: {g0, g1, g2, g3}. + */ + + static void attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix) { @@ -55,6 +85,10 @@ attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix } +/** + * Initialize the bld->a0, dadx, dady fields. This involves fetching + * those values from the arrays which are passed into the JIT function. + */ static void coeffs_init(struct lp_build_interp_soa_context *bld, LLVMValueRef a0_ptr, @@ -91,7 +125,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld, case TGSI_INTERPOLATE_CONSTANT: a0 = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr, &index, 1, ""), ""); a0 = lp_build_broadcast_scalar(&bld->base, a0); - attrib_name(a0, attrib, chan, ".dady"); + attrib_name(a0, attrib, chan, ".a0"); break; default: @@ -135,29 +169,12 @@ coeff_multiply(struct lp_build_interp_soa_context *bld, /** - * Multiply the dadx and dady with the xstep and ystep respectively. + * Emit LLVM code to compute the fragment shader input attribute values. + * For example, for a color input, we'll compute red, green, blue and alpha + * values for the four pixels in a quad. + * Recall that we're operating on 4-element vectors so each arithmetic + * operation is operating on the four pixels in a quad. */ -static void -coeffs_update(struct lp_build_interp_soa_context *bld) -{ - unsigned attrib; - unsigned chan; - - for(attrib = 0; attrib < bld->num_attribs; ++attrib) { - unsigned mask = bld->mask[attrib]; - unsigned mode = bld->mode[attrib]; - if (mode != TGSI_INTERPOLATE_CONSTANT) { - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - if(mask & (1 << chan)) { - bld->dadx[attrib][chan] = coeff_multiply(bld, bld->dadx[attrib][chan], bld->xstep); - bld->dady[attrib][chan] = coeff_multiply(bld, bld->dady[attrib][chan], bld->ystep); - } - } - } - } -} - - static void attribs_init(struct lp_build_interp_soa_context *bld) { @@ -180,7 +197,9 @@ attribs_init(struct lp_build_interp_soa_context *bld) res = a0; if (mode != TGSI_INTERPOLATE_CONSTANT) { + /* res = res + x * dadx */ res = lp_build_add(&bld->base, res, lp_build_mul(&bld->base, x, dadx)); + /* res = res + y * dady */ res = lp_build_add(&bld->base, res, lp_build_mul(&bld->base, y, dady)); } @@ -204,13 +223,19 @@ attribs_init(struct lp_build_interp_soa_context *bld) } +/** + * Increment the shader input attribute values. + * This is called when we move from one quad to the next. + */ static void -attribs_update(struct lp_build_interp_soa_context *bld) +attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) { LLVMValueRef oow = NULL; unsigned attrib; unsigned chan; + assert(quad_index < 4); + for(attrib = 0; attrib < bld->num_attribs; ++attrib) { unsigned mask = bld->mask[attrib]; unsigned mode = bld->mode[attrib]; @@ -224,13 +249,21 @@ attribs_update(struct lp_build_interp_soa_context *bld) res = bld->attribs_pre[attrib][chan]; - if(bld->xstep) + if (quad_index == 1 || quad_index == 3) { + /* top-right or bottom-right quad */ + /* build res = res + dadx + dadx */ res = lp_build_add(&bld->base, res, dadx); + res = lp_build_add(&bld->base, res, dadx); + } - if(bld->ystep) + if (quad_index == 2 || quad_index == 3) { + /* bottom-left or bottom-right quad */ + /* build res = res + dady + dady */ res = lp_build_add(&bld->base, res, dady); + res = lp_build_add(&bld->base, res, dady); + } - bld->attribs_pre[attrib][chan] = res; + //XXX bld->attribs_pre[attrib][chan] = res; if (mode == TGSI_INTERPOLATE_PERSPECTIVE) { LLVMValueRef w = bld->pos[3]; @@ -268,17 +301,32 @@ pos_init(struct lp_build_interp_soa_context *bld, } +/** + * Update quad position values when moving to the next quad. + */ static void -pos_update(struct lp_build_interp_soa_context *bld) +pos_update(struct lp_build_interp_soa_context *bld, int quad_index) { LLVMValueRef x = bld->attribs[0][0]; LLVMValueRef y = bld->attribs[0][1]; + const int xstep = 2, ystep = 2; - if(bld->xstep) - x = lp_build_add(&bld->base, x, lp_build_const_scalar(bld->base.type, bld->xstep)); + if (quad_index == 1 || quad_index == 3) { + /* top-right or bottom-right quad in block */ + /* build x += xstep */ + x = lp_build_add(&bld->base, x, + lp_build_const_scalar(bld->base.type, xstep)); + } - if(bld->ystep) - y = lp_build_add(&bld->base, y, lp_build_const_scalar(bld->base.type, bld->ystep)); + if (quad_index == 2) { + /* bottom-left quad in block */ + /* build y += ystep */ + y = lp_build_add(&bld->base, y, + lp_build_const_scalar(bld->base.type, ystep)); + /* build x -= xstep */ + x = lp_build_sub(&bld->base, x, + lp_build_const_scalar(bld->base.type, xstep)); + } lp_build_name(x, "pos.x"); lp_build_name(y, "pos.y"); @@ -288,6 +336,9 @@ pos_update(struct lp_build_interp_soa_context *bld) } +/** + * Initialize fragment shader input attribute info. + */ void lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, const struct tgsi_token *tokens, @@ -297,9 +348,7 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, LLVMValueRef dadx_ptr, LLVMValueRef dady_ptr, LLVMValueRef x0, - LLVMValueRef y0, - int xstep, - int ystep) + LLVMValueRef y0) { struct tgsi_parse_context parse; struct tgsi_full_declaration *decl; @@ -357,21 +406,19 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, pos_init(bld, x0, y0); attribs_init(bld); - - bld->xstep = xstep; - bld->ystep = ystep; - - coeffs_update(bld); } /** - * Advance the position and inputs with the xstep and ystep. + * Advance the position and inputs to the given quad within the block. */ void -lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld) +lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld, + int quad_index) { - pos_update(bld); + assert(quad_index < 4); + + pos_update(bld, quad_index); - attribs_update(bld); + attribs_update(bld, quad_index); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h index 9c57a10879..e2b3bc1bf0 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h @@ -63,9 +63,6 @@ struct lp_build_interp_soa_context LLVMValueRef dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; LLVMValueRef dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - int xstep; - int ystep; - /* Attribute values before perspective divide */ LLVMValueRef attribs_pre[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; @@ -88,12 +85,11 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, LLVMValueRef dadx_ptr, LLVMValueRef dady_ptr, LLVMValueRef x0, - LLVMValueRef y0, - int xstep, - int ystep); + LLVMValueRef y0); void -lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld); +lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld, + int quad_index); #endif /* LP_BLD_INTERP_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 09495f6288..f88dd4ae68 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -126,8 +126,6 @@ void lp_rast_end( struct lp_rasterizer *rast ) } - - /** * Begining rasterization of a tile. * \param x window X position of the tile, in pixels @@ -152,7 +150,7 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, { const uint8_t *clear_color = arg.clear_color; - RAST_DEBUG("%s %x,%x,%x,%x\n", __FUNCTION__, + RAST_DEBUG("%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__, clear_color[0], clear_color[1], clear_color[2], @@ -181,7 +179,7 @@ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, { unsigned i, j; - RAST_DEBUG("%s\n", __FUNCTION__); + RAST_DEBUG("%s 0x%x\n", __FUNCTION__, arg.clear_zstencil); for (i = 0; i < TILE_SIZE; i++) for (j = 0; j < TILE_SIZE; j++) @@ -225,6 +223,9 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, } +/** + * Compute shading for a 4x4 block of pixels. + */ void lp_rast_shade_quads( struct lp_rasterizer *rast, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, @@ -237,6 +238,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, void *depth; uint32_t ALIGN16_ATTRIB masks[2][2][2][2]; unsigned ix, iy; + int block_offset; /* Sanity checks */ assert(x % TILE_VECTOR_WIDTH == 0); @@ -275,16 +277,20 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, masks[1][1][1][1] = mask & (1 << (1*8+1*4+1*2+1)) ? ~0 : 0; #endif + assert((x % 2) == 0); + assert((y % 2) == 0); + ix = x % TILE_SIZE; iy = y % TILE_SIZE; + /* offset of the 16x16 pixel block within the tile */ + block_offset = ((iy/4)*(16*16) + (ix/4)*16); + /* color buffer */ - color = &TILE_PIXEL(tile->color, ix, iy, 0); + color = tile->color + 4 * block_offset; /* depth buffer */ - assert((x % 2) == 0); - assert((y % 2) == 0); - depth = tile->depth + (iy/4)*(16*16) + (ix/4)*16; + depth = tile->depth + block_offset; /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ assert(lp_check_alignment(masks, 16)); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 0541d36580..aa9c006633 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -358,6 +358,9 @@ generate_blend(const struct pipe_blend_state *blend, /** * Generate the runtime callable function for the whole fragment pipeline. + * Note that the function which we generate operates on a block of 16 + * pixels at at time. The block contains 2x2 quads. Each quad contains + * 2x2 pixels. */ static struct lp_fragment_shader_variant * generate_fragment(struct llvmpipe_context *lp, @@ -437,8 +440,8 @@ generate_fragment(struct llvmpipe_context *lp, fs_type.sign = TRUE; /* values are signed */ fs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ fs_type.width = 32; /* 32-bit float */ - fs_type.length = 4; /* 4 element per vector */ - num_fs = 4; + fs_type.length = 4; /* 4 elements per vector */ + num_fs = 4; /* number of quads per block */ memset(&blend_type, 0, sizeof blend_type); blend_type.floating = FALSE; /* values are integers */ @@ -509,18 +512,19 @@ generate_fragment(struct llvmpipe_context *lp, lp_build_interp_soa_init(&interp, shader->base.tokens, builder, fs_type, a0_ptr, dadx_ptr, dady_ptr, - x0, y0, 2, 0); + x0, y0); /* code generated texture sampling */ sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); + /* loop over quads in the block */ for(i = 0; i < num_fs; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); LLVMValueRef out_color[NUM_CHANNELS]; LLVMValueRef depth_ptr_i; if(i != 0) - lp_build_interp_soa_update(&interp); + lp_build_interp_soa_update(&interp, i); fs_mask[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, mask_ptr, &index, 1, ""), ""); depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, ""); -- cgit v1.2.3 From e01fa1eaec34675d0b30127de4f78b020a092a83 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 11:39:40 -0700 Subject: llvmpipe: comments --- src/gallium/drivers/llvmpipe/lp_bld_logic.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.c b/src/gallium/drivers/llvmpipe/lp_bld_logic.c index db22a8028a..9470f834fc 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.c @@ -41,6 +41,10 @@ #include "lp_bld_logic.h" +/** + * Build code to compare two values 'a' and 'b' using the given func. + * \parm func one of PIPE_FUNC_x + */ LLVMValueRef lp_build_cmp(struct lp_build_context *bld, unsigned func, @@ -56,6 +60,9 @@ lp_build_cmp(struct lp_build_context *bld, LLVMValueRef res; unsigned i; + assert(func >= PIPE_FUNC_NEVER); + assert(func <= PIPE_FUNC_ALWAYS); + if(func == PIPE_FUNC_NEVER) return zeros; if(func == PIPE_FUNC_ALWAYS) @@ -68,6 +75,7 @@ lp_build_cmp(struct lp_build_context *bld, #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) if(type.width * type.length == 128) { if(type.floating && util_cpu_caps.has_sse) { + /* float[4] comparison */ LLVMValueRef args[3]; unsigned cc; boolean swap; @@ -117,6 +125,7 @@ lp_build_cmp(struct lp_build_context *bld, return res; } else if(util_cpu_caps.has_sse2) { + /* int[4] comparison */ static const struct { unsigned swap:1; unsigned eq:1; -- cgit v1.2.3 From 69fe4281ea19e29d534c74d65789494f7be4d4e3 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 11:40:49 -0700 Subject: llvmpipe: additional comment about float->uint conversion --- src/gallium/drivers/llvmpipe/lp_bld_conv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/drivers/llvmpipe/lp_bld_conv.c index 20c8710214..1df938529c 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.c @@ -124,6 +124,10 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, res = LLVMBuildShl(builder, res, lp_build_int_const_scalar(src_type, shift), ""); /* TODO: Fill in the empty lower bits for additional precision? */ + /* YES: this fixes progs/trivial/tri-z-eq.c. + * Otherwise vertex Z=1.0 values get converted to something like + * 0xfffffb00 and the test for equality with 0xffffffff fails. + */ #if 0 { LLVMValueRef msb; -- cgit v1.2.3 From 51663f0506ed2534e57b798cdfaf8a0d376eb7a2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 11:41:45 -0700 Subject: llvmpipe: quick & dirty implementation of lp_rast_store_zstencil() This allows us to do a glReadPixels(GL_DEPTH_COMPONENT) to see what's in the depth buffer to help debugging. --- src/gallium/drivers/llvmpipe/lp_rast.c | 53 ++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index f88dd4ae68..c8359f45a2 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -95,6 +95,23 @@ boolean lp_rast_begin( struct lp_rasterizer *rast, return FALSE; } + if (zsbuf) { + rast->zsbuf_transfer = screen->get_tex_transfer(rast->screen, + zsbuf->texture, + zsbuf->face, + zsbuf->level, + zsbuf->zslice, + PIPE_TRANSFER_READ_WRITE, + 0, 0, width, height); + if (!rast->zsbuf_transfer) + return FALSE; + + rast->zsbuf_map = screen->transfer_map(rast->screen, + rast->zsbuf_transfer); + if (!rast->zsbuf_map) + return FALSE; + } + return TRUE; } @@ -117,7 +134,7 @@ void lp_rast_end( struct lp_rasterizer *rast ) screen->tex_transfer_destroy(rast->cbuf_transfer); if (rast->zsbuf_transfer) - screen->tex_transfer_destroy(rast->cbuf_transfer); + screen->tex_transfer_destroy(rast->zsbuf_transfer); rast->cbuf_transfer = NULL; rast->zsbuf_transfer = NULL; @@ -359,14 +376,44 @@ static void lp_rast_store_color( struct lp_rasterizer *rast ) } +static void +lp_tile_write_z32(const uint32_t *src, uint8_t *dst, unsigned dst_stride, + unsigned x0, unsigned y0, unsigned w, unsigned h) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < h; ++y) { + uint32_t *dst_pixel = (uint32_t *)(dst_row + x0*4); + for (x = 0; x < w; ++x) { + *dst_pixel++ = *src++; + } + dst_row += dst_stride; + } +} + /** * Write the rasterizer's z/stencil tile to the framebuffer. */ static void lp_rast_store_zstencil( struct lp_rasterizer *rast ) { - RAST_DEBUG("%s\n", __FUNCTION__); + const unsigned x = rast->x; + const unsigned y = rast->y; + unsigned w = TILE_SIZE; + unsigned h = TILE_SIZE; + + if (x + w > rast->width) + w -= x + w - rast->width; + + if (y + h > rast->height) + h -= y + h - rast->height; + + RAST_DEBUG("%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); - /* FIXME: call u_tile func to store depth/stencil to surface */ + assert(rast->zsbuf_transfer->format == PIPE_FORMAT_Z32_UNORM); + lp_tile_write_z32(rast->tile.depth, + rast->zsbuf_map, + rast->zsbuf_transfer->stride, + x, y, w, h); } -- cgit v1.2.3 From 52081f0a2c5d1d54d9e0f323c681b30d7186d8e4 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 11:43:19 -0700 Subject: progs/trivial: readback and print Z value in tri-z-eq.c --- progs/trivial/tri-z-eq.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/progs/trivial/tri-z-eq.c b/progs/trivial/tri-z-eq.c index b81c992f7d..195e8a26f6 100644 --- a/progs/trivial/tri-z-eq.c +++ b/progs/trivial/tri-z-eq.c @@ -69,6 +69,8 @@ static void Key(unsigned char key, int x, int y) static void Draw(void) { + float z = 1.0; + glClearColor(0.0, 0.0, 1.0, 0.0); glClearDepth(1.0); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); @@ -77,15 +79,21 @@ static void Draw(void) glBegin(GL_TRIANGLES); glColor3f(0,0,.7); - glVertex3f( 0.9, -0.9, 1.0); + glVertex3f( 0.9, -0.9, z); glColor3f(.8,0,0); - glVertex3f( 0.9, 0.9, 1.0); + glVertex3f( 0.9, 0.9, z); glColor3f(0,.9,0); - glVertex3f(-0.9, 0.0, 1.0); + glVertex3f(-0.9, 0.0, z); glEnd(); glFlush(); + { + GLfloat z; + glReadPixels(125, 125, 1, 1, GL_DEPTH_COMPONENT, GL_FLOAT, &z); + printf("Z at (125, 125) = %f\n", z); + } + if (doubleBuffer) { glutSwapBuffers(); } -- cgit v1.2.3 From 51410a254c96779990995a2183eb742968df09e6 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 14:13:22 -0700 Subject: llvmpipe: fix blend debug strings --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index aa9c006633..c0d5a70a55 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -416,11 +416,11 @@ generate_fragment(struct llvmpipe_context *lp, } else if(key->blend.blend_enable) { debug_printf("blend.rgb_func = %s\n", debug_dump_blend_func (key->blend.rgb_func, TRUE)); - debug_printf("rgb_src_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE)); - debug_printf("rgb_dst_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE)); - debug_printf("alpha_func = %s\n", debug_dump_blend_func (key->blend.alpha_func, TRUE)); - debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE)); - debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE)); + debug_printf("blend.rgb_src_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE)); + debug_printf("blend.rgb_dst_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE)); + debug_printf("blend.alpha_func = %s\n", debug_dump_blend_func (key->blend.alpha_func, TRUE)); + debug_printf("blend.alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE)); + debug_printf("blend.alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE)); } debug_printf("blend.colormask = 0x%x\n", key->blend.colormask); } -- cgit v1.2.3 From 3094fc200920f9d5eb62136d3b25896229fb0dbf Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 14:25:08 -0700 Subject: llvmpipe: more debug info --- src/gallium/drivers/llvmpipe/lp_setup.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 7091232350..14b40dfe36 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -222,6 +222,8 @@ rasterize_bins( struct setup_context *setup, lp_rast_end( rast ); reset_context( setup ); + + SETUP_DEBUG("%s done \n", __FUNCTION__); } @@ -273,6 +275,8 @@ begin_binning( struct setup_context *setup ) else bin_everywhere( setup, lp_rast_load_zstencil, lp_rast_arg_null() ); } + + SETUP_DEBUG("%s done\n", __FUNCTION__); } @@ -422,7 +426,7 @@ lp_setup_set_fs_inputs( struct setup_context *setup, const struct lp_shader_input *input, unsigned nr ) { - SETUP_DEBUG("%s\n", __FUNCTION__); + SETUP_DEBUG("%s %p %u\n", __FUNCTION__, (void *) input, nr); memcpy( setup->fs.input, input, nr * sizeof input[0] ); setup->fs.nr_inputs = nr; @@ -432,7 +436,7 @@ void lp_setup_set_fs( struct setup_context *setup, struct lp_fragment_shader *fs ) { - SETUP_DEBUG("%s\n", __FUNCTION__); + SETUP_DEBUG("%s %p\n", __FUNCTION__, (void *) fs); /* FIXME: reference count */ setup->fs.current.jit_function = fs ? fs->current->jit_function : NULL; @@ -442,7 +446,7 @@ void lp_setup_set_fs_constants(struct setup_context *setup, struct pipe_buffer *buffer) { - SETUP_DEBUG("%s\n", __FUNCTION__); + SETUP_DEBUG("%s %p\n", __FUNCTION__, (void *) buffer); pipe_buffer_reference(&setup->constants.current, buffer); @@ -454,7 +458,7 @@ void lp_setup_set_alpha_ref_value( struct setup_context *setup, float alpha_ref_value ) { - SETUP_DEBUG("%s\n", __FUNCTION__); + SETUP_DEBUG("%s %f\n", __FUNCTION__, alpha_ref_value); if(setup->fs.current.jit_context.alpha_ref_value != alpha_ref_value) { setup->fs.current.jit_context.alpha_ref_value = alpha_ref_value; -- cgit v1.2.3 From 0e042bed49c51fef38b02b7cc05efa504f2f703d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 14:25:46 -0700 Subject: llvmpipe: set LP_SETUP_NEW_FS in lp_setup_set_fs() Fixes progs/trivial/tri-blend.c, but I think we're just getting lucky in this case. --- src/gallium/drivers/llvmpipe/lp_setup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 14b40dfe36..142fec4f80 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -440,6 +440,7 @@ lp_setup_set_fs( struct setup_context *setup, /* FIXME: reference count */ setup->fs.current.jit_function = fs ? fs->current->jit_function : NULL; + setup->dirty |= LP_SETUP_NEW_FS; } void -- cgit v1.2.3 From 29207a2ae6d3e6bae05621cb924c4a1940ce57fc Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 14:57:13 -0700 Subject: llvmpipe: comments --- src/gallium/drivers/llvmpipe/lp_setup_context.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 3209e41c01..a3ec82e382 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -103,8 +103,8 @@ struct setup_context { struct { unsigned flags; - union lp_rast_cmd_arg color; - union lp_rast_cmd_arg zstencil; + union lp_rast_cmd_arg color; /**< lp_rast_clear_color() cmd */ + union lp_rast_cmd_arg zstencil; /**< lp_rast_clear_zstencil() cmd */ } clear; enum { @@ -117,8 +117,8 @@ struct setup_context { struct lp_shader_input input[PIPE_MAX_ATTRIBS]; unsigned nr_inputs; - const struct lp_rast_state *stored; - struct lp_rast_state current; + const struct lp_rast_state *stored; /**< what's in the bins */ + struct lp_rast_state current; /**< currently set state */ } fs; struct { -- cgit v1.2.3 From ffd0759973165368ac8ce07d9bcffeb0acf88e6f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 14:57:44 -0700 Subject: llvmpipe: comments and a stub for lp_rast_set_state() --- src/gallium/drivers/llvmpipe/lp_rast.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index c8359f45a2..2d319777ee 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -161,6 +161,7 @@ void lp_rast_start_tile( struct lp_rasterizer *rast, /** * Clear the rasterizer's current color tile. + * This is a bin command called during bin processing. */ void lp_rast_clear_color( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) @@ -190,6 +191,7 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, /** * Clear the rasterizer's current z/stencil tile. + * This is a bin command called during bin processing. */ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg) @@ -204,6 +206,10 @@ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, } +/** + * Load tile color from the framebuffer surface. + * This is a bin command called during bin processing. + */ void lp_rast_load_color( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg) { @@ -212,6 +218,11 @@ void lp_rast_load_color( struct lp_rasterizer *rast, /* call u_tile func to load colors from surface */ } + +/** + * Load tile z/stencil from the framebuffer surface. + * This is a bin command called during bin processing. + */ void lp_rast_load_zstencil( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { @@ -220,9 +231,25 @@ void lp_rast_load_zstencil( struct lp_rasterizer *rast, /* call u_tile func to load depth (and stencil?) from surface */ } + +void lp_rast_set_state( struct lp_rasterizer *rast, + const union lp_rast_cmd_arg arg ) +{ + RAST_DEBUG("%s\n", __FUNCTION__); + + /* XXX to do */ +} + + + /* Within a tile: */ +/** + * Run the shader on all blocks in a tile. This is used when a tile is + * completely contained inside a triangle. + * This is a bin command called during bin processing. + */ void lp_rast_shade_tile( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { @@ -242,6 +269,7 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, /** * Compute shading for a 4x4 block of pixels. + * This is a bin command called during bin processing. */ void lp_rast_shade_quads( struct lp_rasterizer *rast, const struct lp_rast_shader_inputs *inputs, -- cgit v1.2.3 From e2f46344560f8f1193b311ad41883011e67eea00 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 16:05:12 -0700 Subject: llvmpipe: checkpoint some initial state binning code --- src/gallium/drivers/llvmpipe/lp_rast.c | 2 ++ src/gallium/drivers/llvmpipe/lp_setup.c | 16 +++++++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 2d319777ee..d5fe6e9369 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -235,6 +235,8 @@ void lp_rast_load_zstencil( struct lp_rasterizer *rast, void lp_rast_set_state( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { + const struct lp_rast_state *state = arg.set_state; + RAST_DEBUG("%s\n", __FUNCTION__); /* XXX to do */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 142fec4f80..36bd0ad4dd 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -594,14 +594,24 @@ lp_setup_update_shader_state( struct setup_context *setup ) memcmp(setup->fs.stored, &setup->fs.current, sizeof setup->fs.current) != 0) { - struct lp_rast_state *stored; - - stored = get_data(&setup->data, sizeof *stored); + /* The fs state that's been stored in the bins is different from + * the new, current state. So allocate a new lp_rast_state object + * and append it to the bin's setup data buffer. + */ + struct lp_rast_state *stored = + (struct lp_rast_state *) get_data(&setup->data, sizeof *stored); if(stored) { memcpy(stored, &setup->fs.current, sizeof setup->fs.current); setup->fs.stored = stored; + +#if 0 + /* put the state-set command into all bins */ + bin_everywhere( setup, + lp_rast_set_state, + *setup->fs.stored ); +#endif } } } -- cgit v1.2.3 From a9be9cd8be0fac4be5d65430749666d2204b79a4 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 16:23:52 -0700 Subject: llvmpipe: comments, clean-ups in lp_rast_priv.h --- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 723bb5ad69..4c0dfe2282 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -33,8 +33,10 @@ struct pipe_transfer; struct pipe_screen; -/* We can choose whatever layout for the internal tile storage we - * prefer: + +/** + * A tile's color and depth memory. + * We can choose whatever layout for the internal tile storage we prefer. */ struct lp_rast_tile { @@ -44,21 +46,22 @@ struct lp_rast_tile }; -struct lp_rasterizer { +/** + * This is the state required while rasterizing a tile. + * The tile size is TILE_SIZE x TILE_SIZE pixels. + */ +struct lp_rasterizer +{ + struct lp_rast_tile tile; /** Tile color/z/stencil memory */ - /* We can choose whatever layout for the internal tile storage we - * prefer: - */ - struct lp_rast_tile tile; + unsigned x, y; /**< Pos of this tile in framebuffer, in pixels */ + unsigned width, height; /**< Size of framebuffer, in pixels */ - unsigned x; - unsigned y; boolean clipped_tile; - boolean check_for_clipped_tiles; - unsigned width; - unsigned height; + /* Framebuffer stuff + */ struct pipe_screen *screen; struct pipe_transfer *cbuf_transfer; struct pipe_transfer *zsbuf_transfer; @@ -75,6 +78,8 @@ struct lp_rasterizer { char clear_stencil; } state; + /* Pixel blocks produced during rasterization + */ unsigned nr_blocks; struct { unsigned x; -- cgit v1.2.3 From 4e058f6c4803be5d9d676338d6aee2775b88b87c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 17:00:22 -0700 Subject: llvmpipe: fix incorrect array indexing when saving blend color --- src/gallium/drivers/llvmpipe/lp_setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 36bd0ad4dd..8ef764eb80 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -543,7 +543,7 @@ lp_setup_update_shader_state( struct setup_context *setup ) for (i = 0; i < 4; ++i) { uint8_t c = float_to_ubyte(setup->blend_color.current.color[i]); for (j = 0; j < 16; ++j) - stored[i*4 + j] = c; + stored[i*16 + j] = c; } setup->blend_color.stored = stored; -- cgit v1.2.3 From 30c122a4c90b62ad3d6b7bacb61c04dd38a4b2df Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 17:27:10 -0700 Subject: llvmpipe: new comment in do_triangle_ccw() --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index cf86255406..c21c465a75 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -376,6 +376,11 @@ do_triangle_ccw(struct setup_context *setup, } } + /* + * All fields of 'tri' are now set. The remaining code here is + * concerned with binning. + */ + /* Convert to tile coordinates: */ minx = tri->minx / TILE_SIZE; -- cgit v1.2.3 From f492edee98e75bb4bace5469bfcdbff1a33609b1 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 17:27:37 -0700 Subject: llvmpipe: comments --- src/gallium/drivers/llvmpipe/lp_setup_context.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index a3ec82e382..b502f00eea 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -77,11 +77,19 @@ struct data_block_list { }; +/** + * Point/line/triangle setup context. + * Note: "stored" below indicates data which is stored in the bins, + * not arbitrary malloc'd memory. + */ struct setup_context { struct lp_rasterizer *rast; - /* When there are multiple threads, will want to double-buffer the + /** + * Per-bin data goes into the 'tile' cmd_block_lists. + * Shared bin data goes into the 'data' buffer. + * When there are multiple threads, will want to double-buffer the * bin arrays: */ struct cmd_block_list tile[TILES_X][TILES_Y]; @@ -121,6 +129,7 @@ struct setup_context { struct lp_rast_state current; /**< currently set state */ } fs; + /** fragment shader constants */ struct { struct pipe_buffer *current; unsigned stored_size; @@ -132,7 +141,7 @@ struct setup_context { uint8_t *stored; } blend_color; - unsigned dirty; + unsigned dirty; /**< bitmask of LP_SETUP_x bits */ void (*point)( struct setup_context *, const float (*v0)[4]); @@ -163,7 +172,6 @@ void lp_setup_new_cmd_block( struct cmd_block_list *list ); static INLINE void *get_data( struct data_block_list *list, unsigned size) { - if (list->tail->used + size > DATA_BLOCK_SIZE) { lp_setup_new_data_block( list ); } @@ -189,7 +197,6 @@ static INLINE void *get_data_aligned( struct data_block_list *list, unsigned size, unsigned alignment ) { - if (list->tail->used + size + alignment - 1 > DATA_BLOCK_SIZE) { lp_setup_new_data_block( list ); } -- cgit v1.2.3 From 9c1debe208d07b57e88c65bae186bb339de7dee7 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 17:28:02 -0700 Subject: llvmpipe: comment about blend color --- src/gallium/drivers/llvmpipe/lp_setup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 8ef764eb80..1f303d7705 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -540,6 +540,7 @@ lp_setup_update_shader_state( struct setup_context *setup ) stored = get_data_aligned(&setup->data, 4 * 16, 16); + /* smear each blend color component across 16 ubyte elements */ for (i = 0; i < 4; ++i) { uint8_t c = float_to_ubyte(setup->blend_color.current.color[i]); for (j = 0; j < 16; ++j) -- cgit v1.2.3 From c6057ab8ff8991eac9c73c267696b386f8e56c68 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 17:28:48 -0700 Subject: llvmpipe: comments regarding lp_rast_triangle --- src/gallium/drivers/llvmpipe/lp_rast.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index a50b73b27f..ab21a77834 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -75,8 +75,11 @@ struct lp_rast_shader_inputs { }; -/* Rasterization information for a triangle known to be in this bin, +/** + * Rasterization information for a triangle known to be in this bin, * plus inputs to run the shader: + * These fields are tile- and bin-independent. + * Objects of this type are put into the setup_context::data buffer. */ struct lp_rast_triangle { /* bounding box of tri (in pixels) */ -- cgit v1.2.3 From 04e12e31b252e7a18862c3ac386f302665edb6e7 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Dec 2009 09:14:40 -0700 Subject: llvmpipe: dynamic allocation of triangle a0/dadx/dady arrays Much less memory per triangle now. --- src/gallium/drivers/llvmpipe/lp_rast.h | 10 ++++------ src/gallium/drivers/llvmpipe/lp_setup_tri.c | 10 ++++++++++ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index ab21a77834..435993d44d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -64,14 +64,12 @@ struct lp_rast_shader_inputs { const struct lp_rast_state *state; /* Attribute interpolation: - * * First coefficient is position. - * - * FIXME: reduce memory waste! + * These pointers point into the bin data buffer. */ - float a0[1 + PIPE_MAX_SHADER_INPUTS][4]; - float dadx[1 + PIPE_MAX_SHADER_INPUTS][4]; - float dady[1 + PIPE_MAX_SHADER_INPUTS][4]; + float (*a0)[4]; + float (*dadx)[4]; + float (*dady)[4]; }; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index c21c465a75..3b71bc4c03 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -177,6 +177,16 @@ static void setup_tri_coefficients( struct setup_context *setup, { unsigned slot; + /* Allocate space for the a0, dadx and dady arrays + */ + { + unsigned bytes; + bytes = (setup->fs.nr_inputs + 1) * 4 * sizeof(float); + tri->inputs.a0 = get_data_aligned( &setup->data, bytes, 16 ); + tri->inputs.dadx = get_data_aligned( &setup->data, bytes, 16 ); + tri->inputs.dady = get_data_aligned( &setup->data, bytes, 16 ); + } + /* The internal position input is in slot zero: */ setup_fragcoord_coef(tri, 0, v1, v2, v3); -- cgit v1.2.3 From 1796ffd3bcf74a94c800717e77abaf9902c50b4d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Dec 2009 09:19:09 -0700 Subject: llvmpipe: fix typo, whitespace --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 3b71bc4c03..74ed0a9e8f 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -117,7 +117,6 @@ static void perspective_coef( struct lp_rast_triangle *tri, float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; - tri->inputs.dadx[slot][i] = dadx; tri->inputs.dady[slot][i] = dady; tri->inputs.a0[slot][i] = (a1 - @@ -191,7 +190,7 @@ static void setup_tri_coefficients( struct setup_context *setup, */ setup_fragcoord_coef(tri, 0, v1, v2, v3); - /* setup interpolation for all the remaining attrbutes: + /* setup interpolation for all the remaining attributes: */ for (slot = 0; slot < setup->fs.nr_inputs; slot++) { unsigned vert_attr = setup->fs.input[slot].src_index; -- cgit v1.2.3 From 9dca0100489c7a7c02af77da42a39dbe1560d7e2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Dec 2009 10:41:38 -0700 Subject: llvmpipe: struct cmd_bin Just introducing a new structure to represent a per-tile bin. --- src/gallium/drivers/llvmpipe/lp_setup.c | 11 ++++++----- src/gallium/drivers/llvmpipe/lp_setup_context.h | 19 +++++++++++++++---- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 1f303d7705..fc7f4f6778 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -111,7 +111,7 @@ static void reset_context( struct setup_context *setup ) */ for (i = 0; i < setup->tiles_x; i++) { for (j = 0; j < setup->tiles_y; j++) { - struct cmd_block_list *list = &setup->tile[i][j]; + struct cmd_block_list *list = &setup->tile[i][j].commands; struct cmd_block *block; struct cmd_block *tmp; @@ -173,9 +173,10 @@ static void bin_everywhere( struct setup_context *setup, /** Rasterize commands for a single bin */ static void rasterize_bin( struct lp_rasterizer *rast, - struct cmd_block_list *commands, + const struct cmd_bin *bin, int x, int y) { + const struct cmd_block_list *commands = &bin->commands; struct cmd_block *block; unsigned k; @@ -666,7 +667,7 @@ lp_setup_destroy( struct setup_context *setup ) for (i = 0; i < TILES_X; i++) for (j = 0; j < TILES_Y; j++) - FREE(setup->tile[i][j].head); + FREE(setup->tile[i][j].commands.head); FREE(setup->data.head); @@ -691,8 +692,8 @@ lp_setup_create( struct pipe_screen *screen ) for (i = 0; i < TILES_X; i++) for (j = 0; j < TILES_Y; j++) - setup->tile[i][j].head = - setup->tile[i][j].tail = CALLOC_STRUCT(cmd_block); + setup->tile[i][j].commands.head = + setup->tile[i][j].commands.tail = CALLOC_STRUCT(cmd_block); setup->data.head = setup->data.tail = CALLOC_STRUCT(data_block); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index b502f00eea..1715048f76 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -71,11 +71,20 @@ struct cmd_block_list { struct cmd_block *tail; }; +/** + * For each screen tile we have one of these bins. + */ +struct cmd_bin { + struct cmd_block_list commands; + struct lp_rast_state *curr_state; +}; + + struct data_block_list { struct data_block *head; struct data_block *tail; }; - + /** * Point/line/triangle setup context. @@ -87,12 +96,12 @@ struct setup_context { struct lp_rasterizer *rast; /** - * Per-bin data goes into the 'tile' cmd_block_lists. + * Per-bin data goes into the 'tile' bins. * Shared bin data goes into the 'data' buffer. * When there are multiple threads, will want to double-buffer the * bin arrays: */ - struct cmd_block_list tile[TILES_X][TILES_Y]; + struct cmd_bin tile[TILES_X][TILES_Y]; struct data_block_list data; /* size of framebuffer, in tiles */ @@ -212,10 +221,12 @@ static INLINE void *get_data_aligned( struct data_block_list *list, /* Add a command to a given bin. */ -static INLINE void bin_command( struct cmd_block_list *list, +static INLINE void bin_command( struct cmd_bin *bin, lp_rast_cmd cmd, union lp_rast_cmd_arg arg ) { + struct cmd_block_list *list = &bin->commands; + if (list->tail->count == CMD_BLOCK_MAX) { lp_setup_new_cmd_block( list ); } -- cgit v1.2.3 From b1659b9213f3eeee440590dfe379f0d193948307 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Dec 2009 11:50:40 -0700 Subject: llvmpipe: bin state-change commands Previously, each triangle had a pointer to the state to use for shading. Now we insert state-change commands into the bins. When we execute one of those commands we just update a 'current state' pointer and use that pointer when calling the jit shader. When inserting state-change commands into a bin we check if the previous command was also a state-change command and simply replace it. This avoids accumulating useless/redundant state-change commands. --- src/gallium/drivers/llvmpipe/lp_rast.c | 9 +- src/gallium/drivers/llvmpipe/lp_rast.h | 15 ++-- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 2 + src/gallium/drivers/llvmpipe/lp_setup.c | 113 +++++++++++++++++------- src/gallium/drivers/llvmpipe/lp_setup_context.h | 7 +- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 2 - 6 files changed, 101 insertions(+), 47 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index d5fe6e9369..8f37a28e87 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -237,9 +237,10 @@ void lp_rast_set_state( struct lp_rasterizer *rast, { const struct lp_rast_state *state = arg.set_state; - RAST_DEBUG("%s\n", __FUNCTION__); + RAST_DEBUG("%s %p\n", __FUNCTION__, (void *) state); - /* XXX to do */ + /* just set the current state pointer for this rasterizer */ + rast->current_state = state; } @@ -279,7 +280,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, unsigned mask) { #if 1 - const struct lp_rast_state *state = inputs->state; + const struct lp_rast_state *state = rast->current_state; struct lp_rast_tile *tile = &rast->tile; void *color; void *depth; @@ -287,6 +288,8 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, unsigned ix, iy; int block_offset; + assert(state); + /* Sanity checks */ assert(x % TILE_VECTOR_WIDTH == 0); assert(y % TILE_VECTOR_HEIGHT == 0); diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 435993d44d..e9a1fa49ad 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -55,18 +55,13 @@ struct lp_rast_state { }; -/* Coefficients necessary to run the shader at a given location: + +/** + * Coefficients necessary to run the shader at a given location. + * First coefficient is position. + * These pointers point into the bin data buffer. */ struct lp_rast_shader_inputs { - - /* Current rasterizer state: - */ - const struct lp_rast_state *state; - - /* Attribute interpolation: - * First coefficient is position. - * These pointers point into the bin data buffer. - */ float (*a0)[4]; float (*dadx)[4]; float (*dady)[4]; diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 4c0dfe2282..98111edff7 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -86,6 +86,8 @@ struct lp_rasterizer unsigned y; unsigned mask; } blocks[256]; + + const struct lp_rast_state *current_state; }; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index fc7f4f6778..11a9fd2637 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -155,6 +155,34 @@ static void reset_context( struct setup_context *setup ) } +/** + * Return last command in the bin + */ +static lp_rast_cmd +lp_get_last_command( const struct cmd_bin *bin ) +{ + const struct cmd_block *tail = bin->commands.tail; + const unsigned i = tail->count; + if (i > 0) + return tail->cmd[i - 1]; + else + return NULL; +} + + +/** + * Replace the arg of the last command in the bin. + */ +static void +lp_replace_last_command_arg( struct cmd_bin *bin, + const union lp_rast_cmd_arg arg ) +{ + struct cmd_block *tail = bin->commands.tail; + const unsigned i = tail->count; + assert(i > 0); + tail->arg[i - 1] = arg; +} + /* Add a command to all active bins. @@ -170,6 +198,32 @@ static void bin_everywhere( struct setup_context *setup, } +/** + * Put a state-change command into all bins. + * If we find that the last command in a bin was also a state-change + * command, we can simply replace that one with the new one. + */ +static void +bin_state_command( struct setup_context *setup, + lp_rast_cmd cmd, + const union lp_rast_cmd_arg arg ) +{ + unsigned i, j; + for (i = 0; i < setup->tiles_x; i++) { + for (j = 0; j < setup->tiles_y; j++) { + struct cmd_bin *bin = &setup->tile[i][j]; + lp_rast_cmd last_cmd = lp_get_last_command(bin); + if (last_cmd == cmd) { + lp_replace_last_command_arg(bin, arg); + } + else { + bin_command( bin, cmd, arg ); + } + } + } +} + + /** Rasterize commands for a single bin */ static void rasterize_bin( struct lp_rasterizer *rast, @@ -234,31 +288,6 @@ begin_binning( struct setup_context *setup ) { SETUP_DEBUG("%s\n", __FUNCTION__); - if (!setup->fb.cbuf && !setup->fb.zsbuf) { - setup->fb.width = 0; - setup->fb.height = 0; - } - else if (!setup->fb.zsbuf) { - setup->fb.width = setup->fb.cbuf->width; - setup->fb.height = setup->fb.cbuf->height; - } - else if (!setup->fb.cbuf) { - setup->fb.width = setup->fb.zsbuf->width; - setup->fb.height = setup->fb.zsbuf->height; - } - else { - /* XXX: not sure what we're really supposed to do for - * mis-matched color & depth buffer sizes. - */ - setup->fb.width = MIN2(setup->fb.cbuf->width, - setup->fb.zsbuf->width); - setup->fb.height = MIN2(setup->fb.cbuf->height, - setup->fb.zsbuf->height); - } - - setup->tiles_x = align(setup->fb.width, TILE_SIZE) / TILE_SIZE; - setup->tiles_y = align(setup->fb.height, TILE_SIZE) / TILE_SIZE; - if (setup->fb.cbuf) { if (setup->clear.flags & PIPE_CLEAR_COLOR) bin_everywhere( setup, @@ -352,8 +381,34 @@ lp_setup_bind_framebuffer( struct setup_context *setup, pipe_surface_reference( &setup->fb.cbuf, color ); pipe_surface_reference( &setup->fb.zsbuf, zstencil ); + + if (!setup->fb.cbuf && !setup->fb.zsbuf) { + setup->fb.width = 0; + setup->fb.height = 0; + } + else if (!setup->fb.zsbuf) { + setup->fb.width = setup->fb.cbuf->width; + setup->fb.height = setup->fb.cbuf->height; + } + else if (!setup->fb.cbuf) { + setup->fb.width = setup->fb.zsbuf->width; + setup->fb.height = setup->fb.zsbuf->height; + } + else { + /* XXX: not sure what we're really supposed to do for + * mis-matched color & depth buffer sizes. + */ + setup->fb.width = MIN2(setup->fb.cbuf->width, + setup->fb.zsbuf->width); + setup->fb.height = MIN2(setup->fb.cbuf->height, + setup->fb.zsbuf->height); + } + + setup->tiles_x = align(setup->fb.width, TILE_SIZE) / TILE_SIZE; + setup->tiles_y = align(setup->fb.height, TILE_SIZE) / TILE_SIZE; } + void lp_setup_clear( struct setup_context *setup, const float *color, @@ -608,12 +663,10 @@ lp_setup_update_shader_state( struct setup_context *setup ) sizeof setup->fs.current); setup->fs.stored = stored; -#if 0 /* put the state-set command into all bins */ - bin_everywhere( setup, - lp_rast_set_state, - *setup->fs.stored ); -#endif + bin_state_command( setup, + lp_rast_set_state, + lp_rast_arg_state(setup->fs.stored) ); } } } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 1715048f76..7c7c34f3f7 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -76,10 +76,14 @@ struct cmd_block_list { */ struct cmd_bin { struct cmd_block_list commands; - struct lp_rast_state *curr_state; }; +/** + * This stores bulk data which is shared by all bins. + * Examples include triangle data and state data. The commands in + * the per-tile bins will point to chunks of data in this structure. + */ struct data_block_list { struct data_block *head; struct data_block *tail; @@ -241,5 +245,4 @@ static INLINE void bin_command( struct cmd_bin *bin, } - #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 74ed0a9e8f..48733a599b 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -295,8 +295,6 @@ do_triangle_ccw(struct setup_context *setup, return; } - tri->inputs.state = setup->fs.stored; - /* */ tri->oneoverarea = ((float)FIXED_ONE) / (float)area; -- cgit v1.2.3 From 5c7d1b592ad9ce9e7ee36610f17d41e5c2881d54 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Dec 2009 11:58:24 -0700 Subject: llvmpipe: remove lp_rast_triangle::oneoverarea field Makes lp_rast_triangle a little smaller (now 280 bytes on a 32-bit system). --- src/gallium/drivers/llvmpipe/lp_rast.h | 5 ----- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 32 ++++++++++++++++------------- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index e9a1fa49ad..a119b089bd 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -108,11 +108,6 @@ struct lp_rast_triangle { int step[3][16]; - /* XXX: this is only used inside lp_setup_tri.c, don't really - * need it here: - */ - float oneoverarea; - /* inputs for the shader */ struct lp_rast_shader_inputs inputs; }; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 48733a599b..56a32d0ac0 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -54,6 +54,7 @@ static void constant_coef( struct lp_rast_triangle *tri, * for a triangle. */ static void linear_coef( struct lp_rast_triangle *tri, + float oneoverarea, unsigned slot, const float (*v1)[4], const float (*v2)[4], @@ -67,8 +68,8 @@ static void linear_coef( struct lp_rast_triangle *tri, float da12 = a1 - a2; float da31 = a3 - a1; - float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; - float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; + float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * oneoverarea; + float dady = (da31 * tri->dx12 - tri->dx31 * da12) * oneoverarea; tri->inputs.dadx[slot][i] = dadx; tri->inputs.dady[slot][i] = dady; @@ -100,6 +101,7 @@ static void linear_coef( struct lp_rast_triangle *tri, * divide the interpolated value by the interpolated W at that fragment. */ static void perspective_coef( struct lp_rast_triangle *tri, + float oneoverarea, unsigned slot, const float (*v1)[4], const float (*v2)[4], @@ -114,8 +116,8 @@ static void perspective_coef( struct lp_rast_triangle *tri, float a3 = v3[vert_attr][i] * v3[0][3]; float da12 = a1 - a2; float da31 = a3 - a1; - float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; - float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; + float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * oneoverarea; + float dady = (da31 * tri->dx12 - tri->dx31 * da12) * oneoverarea; tri->inputs.dadx[slot][i] = dadx; tri->inputs.dady[slot][i] = dady; @@ -133,6 +135,7 @@ static void perspective_coef( struct lp_rast_triangle *tri, */ static void setup_fragcoord_coef(struct lp_rast_triangle *tri, + float oneoverarea, unsigned slot, const float (*v1)[4], const float (*v2)[4], @@ -147,9 +150,9 @@ setup_fragcoord_coef(struct lp_rast_triangle *tri, tri->inputs.dadx[slot][1] = 0.0; tri->inputs.dady[slot][1] = 1.0; /*Z*/ - linear_coef(tri, slot, v1, v2, v3, 0, 2); + linear_coef(tri, oneoverarea, slot, v1, v2, v3, 0, 2); /*W*/ - linear_coef(tri, slot, v1, v2, v3, 0, 3); + linear_coef(tri, oneoverarea, slot, v1, v2, v3, 0, 3); } @@ -169,10 +172,11 @@ static void setup_facing_coef( struct lp_rast_triangle *tri, */ static void setup_tri_coefficients( struct setup_context *setup, struct lp_rast_triangle *tri, + float oneoverarea, const float (*v1)[4], const float (*v2)[4], const float (*v3)[4], - boolean frontface ) + boolean frontface) { unsigned slot; @@ -188,7 +192,7 @@ static void setup_tri_coefficients( struct setup_context *setup, /* The internal position input is in slot zero: */ - setup_fragcoord_coef(tri, 0, v1, v2, v3); + setup_fragcoord_coef(tri, oneoverarea, 0, v1, v2, v3); /* setup interpolation for all the remaining attributes: */ @@ -204,18 +208,18 @@ static void setup_tri_coefficients( struct setup_context *setup, case LP_INTERP_LINEAR: for (i = 0; i < NUM_CHANNELS; i++) - linear_coef(tri, slot+1, v1, v2, v3, vert_attr, i); + linear_coef(tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i); break; case LP_INTERP_PERSPECTIVE: for (i = 0; i < NUM_CHANNELS; i++) - perspective_coef(tri, slot+1, v1, v2, v3, vert_attr, i); + perspective_coef(tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i); break; case LP_INTERP_POSITION: /* XXX: fix me - duplicates the values in slot zero. */ - setup_fragcoord_coef(tri, slot+1, v1, v2, v3); + setup_fragcoord_coef(tri, oneoverarea, slot+1, v1, v2, v3); break; case LP_INTERP_FACING: @@ -260,7 +264,7 @@ do_triangle_ccw(struct setup_context *setup, const int y3 = subpixel_snap(v3[0][1]); struct lp_rast_triangle *tri = get_data( &setup->data, sizeof *tri ); - float area; + float area, oneoverarea; int minx, maxx, miny, maxy; tri->dx12 = x1 - x2; @@ -297,11 +301,11 @@ do_triangle_ccw(struct setup_context *setup, /* */ - tri->oneoverarea = ((float)FIXED_ONE) / (float)area; + oneoverarea = ((float)FIXED_ONE) / (float)area; /* Setup parameter interpolants: */ - setup_tri_coefficients( setup, tri, v1, v2, v3, frontfacing ); + setup_tri_coefficients( setup, tri, oneoverarea, v1, v2, v3, frontfacing ); /* half-edge constants, will be interated over the whole * rendertarget. -- cgit v1.2.3 From d9dc3d59760a28d54013d3d164f61d85ec807651 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Dec 2009 12:54:37 -0700 Subject: llvmpipe: move bin-related structures and functions into new lp_bin.[ch] And put lp_ prefixes on some functions. --- src/gallium/drivers/llvmpipe/Makefile | 1 + src/gallium/drivers/llvmpipe/SConscript | 7 +- src/gallium/drivers/llvmpipe/lp_bin.c | 51 ++++++++ src/gallium/drivers/llvmpipe/lp_bin.h | 167 ++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_rast.h | 5 + src/gallium/drivers/llvmpipe/lp_setup.c | 27 +--- src/gallium/drivers/llvmpipe/lp_setup_context.h | 116 +--------------- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 18 +-- 8 files changed, 243 insertions(+), 149 deletions(-) create mode 100644 src/gallium/drivers/llvmpipe/lp_bin.c create mode 100644 src/gallium/drivers/llvmpipe/lp_bin.h diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index bfe34396d9..0a5d1b9f1b 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -6,6 +6,7 @@ LIBNAME = llvmpipe CFLAGS += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS C_SOURCES = \ + lp_bin.c \ lp_bld_alpha.c \ lp_bld_arit.c \ lp_bld_blend_aos.c \ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 3530e739cc..4aef338735 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -19,6 +19,7 @@ env.CodeGenerate( llvmpipe = env.ConvenienceLibrary( target = 'llvmpipe', source = [ + 'lp_bin.c', 'lp_bld_alpha.c', 'lp_bld_arit.c', 'lp_bld_blend_aos.c', @@ -46,7 +47,7 @@ llvmpipe = env.ConvenienceLibrary( 'lp_flush.c', 'lp_jit.c', 'lp_prim_vbuf.c', - 'lp_query.c', + 'lp_query.c', 'lp_setup.c', 'lp_setup_tri.c', 'lp_setup_line.c', @@ -62,8 +63,8 @@ llvmpipe = env.ConvenienceLibrary( 'lp_state_vertex.c', 'lp_state_vs.c', 'lp_surface.c', - 'lp_rast.c', - 'lp_rast_tri.c', + 'lp_rast.c', + 'lp_rast_tri.c', 'lp_tex_sample_llvm.c', 'lp_texture.c', 'lp_tile_soa.c', diff --git a/src/gallium/drivers/llvmpipe/lp_bin.c b/src/gallium/drivers/llvmpipe/lp_bin.c new file mode 100644 index 0000000000..f43cdcbf3d --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bin.c @@ -0,0 +1,51 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_memory.h" +#include "lp_bin.h" + + +void +lp_bin_new_cmd_block( struct cmd_block_list *list ) +{ + struct cmd_block *block = MALLOC_STRUCT(cmd_block); + list->tail->next = block; + list->tail = block; + block->next = NULL; + block->count = 0; +} + + +void +lp_bin_new_data_block( struct data_block_list *list ) +{ + struct data_block *block = MALLOC_STRUCT(data_block); + list->tail->next = block; + list->tail = block; + block->next = NULL; + block->used = 0; +} diff --git a/src/gallium/drivers/llvmpipe/lp_bin.h b/src/gallium/drivers/llvmpipe/lp_bin.h new file mode 100644 index 0000000000..fa25d78631 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bin.h @@ -0,0 +1,167 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Binner data structures and bin-related functions. + * Note: the "setup" code is concerned with building bins while + * The "rast" code is concerned with consuming/executing bins. + */ + +#ifndef LP_BIN_H +#define LP_BIN_H + +#include "lp_rast.h" + + +#define CMD_BLOCK_MAX 128 +#define DATA_BLOCK_SIZE (16 * 1024 - sizeof(unsigned) - sizeof(void *)) + + + +/* switch to a non-pointer value for this: + */ +typedef void (*lp_rast_cmd)( struct lp_rasterizer *, const union lp_rast_cmd_arg ); + +struct cmd_block { + lp_rast_cmd cmd[CMD_BLOCK_MAX]; + union lp_rast_cmd_arg arg[CMD_BLOCK_MAX]; + unsigned count; + struct cmd_block *next; +}; + +struct data_block { + ubyte data[DATA_BLOCK_SIZE]; + unsigned used; + struct data_block *next; +}; + +struct cmd_block_list { + struct cmd_block *head; + struct cmd_block *tail; +}; + +/** + * For each screen tile we have one of these bins. + */ +struct cmd_bin { + struct cmd_block_list commands; +}; + + +/** + * This stores bulk data which is shared by all bins. + * Examples include triangle data and state data. The commands in + * the per-tile bins will point to chunks of data in this structure. + */ +struct data_block_list { + struct data_block *head; + struct data_block *tail; +}; + + + +extern void lp_bin_new_data_block( struct data_block_list *list ); + +extern void lp_bin_new_cmd_block( struct cmd_block_list *list ); + + +/** + * Allocate space for a command/data in the given block list. + * Grow the block list if needed. + */ +static INLINE void * +lp_bin_alloc( struct data_block_list *list, unsigned size) +{ + if (list->tail->used + size > DATA_BLOCK_SIZE) { + lp_bin_new_data_block( list ); + } + + { + struct data_block *tail = list->tail; + ubyte *data = tail->data + tail->used; + tail->used += size; + return data; + } +} + + +/** + * As above, but with specific alignment. + */ +static INLINE void * +lp_bin_alloc_aligned( struct data_block_list *list, unsigned size, + unsigned alignment ) +{ + if (list->tail->used + size + alignment - 1 > DATA_BLOCK_SIZE) { + lp_bin_new_data_block( list ); + } + + { + struct data_block *tail = list->tail; + ubyte *data = tail->data + tail->used; + unsigned offset = (((uintptr_t)data + alignment - 1) & ~(alignment - 1)) - (uintptr_t)data; + tail->used += offset + size; + return data + offset; + } +} + + +/* Put back data if we decide not to use it, eg. culled triangles. + */ +static INLINE void +lp_bin_putback_data( struct data_block_list *list, unsigned size) +{ + assert(list->tail->used >= size); + list->tail->used -= size; +} + + +/* Add a command to a given bin. + */ +static INLINE void +lp_bin_command( struct cmd_bin *bin, + lp_rast_cmd cmd, + union lp_rast_cmd_arg arg ) +{ + struct cmd_block_list *list = &bin->commands; + + if (list->tail->count == CMD_BLOCK_MAX) { + lp_bin_new_cmd_block( list ); + } + + { + struct cmd_block *tail = list->tail; + unsigned i = tail->count; + tail->cmd[i] = cmd; + tail->arg[i] = arg; + tail->count++; + } +} + + +#endif /* LP_BIN_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index a119b089bd..307c45cb9f 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -43,6 +43,11 @@ struct pipe_screen; #define FIXED_ONE (1<tail->next = block; - list->tail = block; - block->next = NULL; - block->count = 0; -} - -void lp_setup_new_data_block( struct data_block_list *list ) -{ - struct data_block *block = MALLOC_STRUCT(data_block); - list->tail->next = block; - list->tail = block; - block->next = NULL; - block->used = 0; -} static void @@ -194,7 +177,7 @@ static void bin_everywhere( struct setup_context *setup, unsigned i, j; for (i = 0; i < setup->tiles_x; i++) for (j = 0; j < setup->tiles_y; j++) - bin_command( &setup->tile[i][j], cmd, arg ); + lp_bin_command( &setup->tile[i][j], cmd, arg ); } @@ -217,7 +200,7 @@ bin_state_command( struct setup_context *setup, lp_replace_last_command_arg(bin, arg); } else { - bin_command( bin, cmd, arg ); + lp_bin_command( bin, cmd, arg ); } } } @@ -594,7 +577,7 @@ lp_setup_update_shader_state( struct setup_context *setup ) uint8_t *stored; unsigned i, j; - stored = get_data_aligned(&setup->data, 4 * 16, 16); + stored = lp_bin_alloc_aligned(&setup->data, 4 * 16, 16); /* smear each blend color component across 16 ubyte elements */ for (i = 0; i < 4; ++i) { @@ -626,7 +609,7 @@ lp_setup_update_shader_state( struct setup_context *setup ) current_size) != 0) { void *stored; - stored = get_data(&setup->data, current_size); + stored = lp_bin_alloc(&setup->data, current_size); if(stored) { memcpy(stored, current_data, @@ -656,7 +639,7 @@ lp_setup_update_shader_state( struct setup_context *setup ) * and append it to the bin's setup data buffer. */ struct lp_rast_state *stored = - (struct lp_rast_state *) get_data(&setup->data, sizeof *stored); + (struct lp_rast_state *) lp_bin_alloc(&setup->data, sizeof *stored); if(stored) { memcpy(stored, &setup->fs.current, diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 7c7c34f3f7..5abe66f586 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -31,6 +31,7 @@ #include "lp_setup.h" #include "lp_rast.h" #include "lp_tile_soa.h" /* for TILE_SIZE */ +#include "lp_bin.h" /* We're limited to 2K by 2K for 32bit fixed point rasterization. * Will need a 64-bit version for larger framebuffers. @@ -40,56 +41,12 @@ #define TILES_X (MAXWIDTH / TILE_SIZE) #define TILES_Y (MAXHEIGHT / TILE_SIZE) -#define CMD_BLOCK_MAX 128 -#define DATA_BLOCK_SIZE (16 * 1024 - sizeof(unsigned) - sizeof(void *)) - #define LP_SETUP_NEW_FS 0x01 #define LP_SETUP_NEW_CONSTANTS 0x02 #define LP_SETUP_NEW_BLEND_COLOR 0x04 -/* switch to a non-pointer value for this: - */ -typedef void (*lp_rast_cmd)( struct lp_rasterizer *, const union lp_rast_cmd_arg ); - -struct cmd_block { - lp_rast_cmd cmd[CMD_BLOCK_MAX]; - union lp_rast_cmd_arg arg[CMD_BLOCK_MAX]; - unsigned count; - struct cmd_block *next; -}; - -struct data_block { - ubyte data[DATA_BLOCK_SIZE]; - unsigned used; - struct data_block *next; -}; - -struct cmd_block_list { - struct cmd_block *head; - struct cmd_block *tail; -}; - -/** - * For each screen tile we have one of these bins. - */ -struct cmd_bin { - struct cmd_block_list commands; -}; - - -/** - * This stores bulk data which is shared by all bins. - * Examples include triangle data and state data. The commands in - * the per-tile bins will point to chunks of data in this structure. - */ -struct data_block_list { - struct data_block *head; - struct data_block *tail; -}; - - /** * Point/line/triangle setup context. * Note: "stored" below indicates data which is stored in the bins, @@ -174,75 +131,4 @@ void lp_setup_choose_line( struct setup_context *setup ); void lp_setup_choose_point( struct setup_context *setup ); -void lp_setup_new_data_block( struct data_block_list *list ); -void lp_setup_new_cmd_block( struct cmd_block_list *list ); - - -/** - * Allocate space for a command/data in the given block list. - * Grow the block list if needed. - */ -static INLINE void *get_data( struct data_block_list *list, - unsigned size) -{ - if (list->tail->used + size > DATA_BLOCK_SIZE) { - lp_setup_new_data_block( list ); - } - - { - struct data_block *tail = list->tail; - ubyte *data = tail->data + tail->used; - tail->used += size; - return data; - } -} - -/* Put back data if we decide not to use it, eg. culled triangles. - */ -static INLINE void putback_data( struct data_block_list *list, - unsigned size) -{ - list->tail->used -= size; -} - - -static INLINE void *get_data_aligned( struct data_block_list *list, - unsigned size, - unsigned alignment ) -{ - if (list->tail->used + size + alignment - 1 > DATA_BLOCK_SIZE) { - lp_setup_new_data_block( list ); - } - - { - struct data_block *tail = list->tail; - ubyte *data = tail->data + tail->used; - unsigned offset = (((uintptr_t)data + alignment - 1) & ~(alignment - 1)) - (uintptr_t)data; - tail->used += offset + size; - return data + offset; - } -} - -/* Add a command to a given bin. - */ -static INLINE void bin_command( struct cmd_bin *bin, - lp_rast_cmd cmd, - union lp_rast_cmd_arg arg ) -{ - struct cmd_block_list *list = &bin->commands; - - if (list->tail->count == CMD_BLOCK_MAX) { - lp_setup_new_cmd_block( list ); - } - - { - struct cmd_block *tail = list->tail; - unsigned i = tail->count; - tail->cmd[i] = cmd; - tail->arg[i] = arg; - tail->count++; - } -} - - #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 56a32d0ac0..5e53b4050e 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -185,9 +185,9 @@ static void setup_tri_coefficients( struct setup_context *setup, { unsigned bytes; bytes = (setup->fs.nr_inputs + 1) * 4 * sizeof(float); - tri->inputs.a0 = get_data_aligned( &setup->data, bytes, 16 ); - tri->inputs.dadx = get_data_aligned( &setup->data, bytes, 16 ); - tri->inputs.dady = get_data_aligned( &setup->data, bytes, 16 ); + tri->inputs.a0 = lp_bin_alloc_aligned( &setup->data, bytes, 16 ); + tri->inputs.dadx = lp_bin_alloc_aligned( &setup->data, bytes, 16 ); + tri->inputs.dady = lp_bin_alloc_aligned( &setup->data, bytes, 16 ); } /* The internal position input is in slot zero: @@ -263,7 +263,7 @@ do_triangle_ccw(struct setup_context *setup, const int y2 = subpixel_snap(v2[0][1]); const int y3 = subpixel_snap(v3[0][1]); - struct lp_rast_triangle *tri = get_data( &setup->data, sizeof *tri ); + struct lp_rast_triangle *tri = lp_bin_alloc( &setup->data, sizeof *tri ); float area, oneoverarea; int minx, maxx, miny, maxy; @@ -283,7 +283,7 @@ do_triangle_ccw(struct setup_context *setup, * XXX: subject to overflow?? */ if (area <= 0) { - putback_data( &setup->data, sizeof *tri ); + lp_bin_putback_data( &setup->data, sizeof *tri ); return; } @@ -295,7 +295,7 @@ do_triangle_ccw(struct setup_context *setup, if (tri->miny == tri->maxy || tri->minx == tri->maxx) { - putback_data( &setup->data, sizeof *tri ); + lp_bin_putback_data( &setup->data, sizeof *tri ); return; } @@ -405,7 +405,7 @@ do_triangle_ccw(struct setup_context *setup, { /* Triangle is contained in a single tile: */ - bin_command( &setup->tile[minx][miny], lp_rast_triangle, + lp_bin_command( &setup->tile[minx][miny], lp_rast_triangle, lp_rast_arg_triangle(tri) ); } else @@ -464,7 +464,7 @@ do_triangle_ccw(struct setup_context *setup, { in = 1; /* triangle covers the whole tile- shade whole tile */ - bin_command( &setup->tile[x][y], + lp_bin_command( &setup->tile[x][y], lp_rast_shade_tile, lp_rast_arg_inputs(&tri->inputs) ); } @@ -472,7 +472,7 @@ do_triangle_ccw(struct setup_context *setup, { in = 1; /* shade partial tile */ - bin_command( &setup->tile[x][y], + lp_bin_command( &setup->tile[x][y], lp_rast_triangle, lp_rast_arg_triangle(tri) ); } -- cgit v1.2.3 From 49a720c5cdfb500c323ae2411b39f4609d14f021 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Dec 2009 12:59:34 -0700 Subject: llvmpipe: comments and minor clean-ups --- src/gallium/drivers/llvmpipe/lp_rast.h | 27 +++++++++++++++++-------- src/gallium/drivers/llvmpipe/lp_setup_context.h | 7 +++++++ 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 307c45cb9f..21bbf104b1 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -25,12 +25,22 @@ * **************************************************************************/ +/** + * The rast code is concerned with rasterization of command bins. + * Each screen tile has a bin associated with it. To render the + * scene we iterate over the tile bins and execute the commands + * in each bin. + * We'll do that with multiple threads... + */ + + #ifndef LP_RAST_H #define LP_RAST_H #include "pipe/p_compiler.h" #include "lp_jit.h" + /* Initially create and program a single rasterizer directly. Later * will want multiple of these, one or two per core. At that stage * will probably pass command buffers into the rasterizers rather than @@ -57,7 +67,6 @@ struct lp_rast_state { * the tile color/z/stencil data somehow: */ lp_jit_frag_func jit_function; - }; @@ -121,6 +130,9 @@ struct lp_rast_triangle { struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen ); +void lp_rast_destroy( struct lp_rasterizer * ); + + boolean lp_rast_begin( struct lp_rasterizer *rast, struct pipe_surface *cbuf, struct pipe_surface *zsbuf, @@ -147,6 +159,7 @@ union lp_rast_cmd_arg { unsigned clear_zstencil; }; + /* Cast wrappers. Hopefully these compile to noops! */ static INLINE const union lp_rast_cmd_arg @@ -183,10 +196,12 @@ lp_rast_arg_null( void ) - - -/* Binnable Commands: +/** + * Binnable Commands. + * These get put into bins by the setup code and are called when + * the bins are executed. */ + void lp_rast_clear_color( struct lp_rasterizer *, const union lp_rast_cmd_arg ); @@ -214,9 +229,5 @@ void lp_rast_shade_tile( struct lp_rasterizer *, void lp_rast_end_tile( struct lp_rasterizer *rast ); -/* Shutdown: - */ -void lp_rast_destroy( struct lp_rasterizer * ); - #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 5abe66f586..180b8f6e88 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -25,6 +25,13 @@ * **************************************************************************/ + +/** + * The setup code is concerned with point/line/triangle setup and + * putting commands/data into the bins. + */ + + #ifndef LP_SETUP_CONTEXT_H #define LP_SETUP_CONTEXT_H -- cgit v1.2.3 From 24a3b0d23a93378d77198f1c92f6f381c0ad05b8 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Dec 2009 13:01:03 -0700 Subject: llvmpipe: add missing sources to Makefile --- src/gallium/drivers/llvmpipe/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 0a5d1b9f1b..6ff45d0f05 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -37,6 +37,8 @@ C_SOURCES = \ lp_rast.c \ lp_rast_tri.c \ lp_setup.c \ + lp_setup_line.c \ + lp_setup_point.c \ lp_setup_tri.c \ lp_query.c \ lp_screen.c \ -- cgit v1.2.3 From 15a2a588d88ae02e575ff1ef9287c789ebdadead Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Dec 2009 13:53:28 -0700 Subject: llvmpipe: fix-up polygon culling/winding --- src/gallium/drivers/llvmpipe/lp_state_rasterizer.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c index 4561c6b845..282ed2e9ea 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c +++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c @@ -29,6 +29,7 @@ #include "util/u_memory.h" #include "lp_context.h" #include "lp_state.h" +#include "lp_setup.h" #include "draw/draw_context.h" @@ -50,6 +51,16 @@ void llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, llvmpipe->rasterizer = (struct pipe_rasterizer_state *)setup; + /* Note: we can immediately set the triangle state here and + * not worry about binning because we handle culling during + * triangle setup, not when rasterizing the bins. + */ + if (llvmpipe->rasterizer) { + lp_setup_set_triangle_state( llvmpipe->setup, + llvmpipe->rasterizer->cull_mode, + llvmpipe->rasterizer->front_winding == PIPE_WINDING_CCW ); + } + llvmpipe->dirty |= LP_NEW_RASTERIZER; } -- cgit v1.2.3 From a08d6302168341001003da32d42cfcff2311fa04 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Dec 2009 14:11:25 -0700 Subject: llvmpipe: use LP_DBG() macro everywhere --- src/gallium/drivers/llvmpipe/lp_rast.c | 24 +++++++++--------- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 3 ++- src/gallium/drivers/llvmpipe/lp_setup.c | 40 +++++++++++++++--------------- 3 files changed, 34 insertions(+), 33 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 8f37a28e87..5891a2a706 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -27,13 +27,13 @@ #include "util/u_memory.h" +#include "lp_debug.h" #include "lp_state.h" #include "lp_rast.h" #include "lp_rast_priv.h" #include "lp_tile_soa.h" #include "lp_bld_debug.h" -#define RAST_DEBUG debug_printf struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen ) { @@ -65,7 +65,7 @@ boolean lp_rast_begin( struct lp_rasterizer *rast, { struct pipe_screen *screen = rast->screen; - RAST_DEBUG("%s %dx%d\n", __FUNCTION__, width, height); + LP_DBG(DEBUG_RAST, "%s %dx%d\n", __FUNCTION__, width, height); pipe_surface_reference(&rast->state.cbuf, cbuf); pipe_surface_reference(&rast->state.zsbuf, zsbuf); @@ -152,7 +152,7 @@ void lp_rast_start_tile( struct lp_rasterizer *rast, unsigned x, unsigned y ) { - RAST_DEBUG("%s %d,%d\n", __FUNCTION__, x, y); + LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y); rast->x = x; rast->y = y; @@ -168,7 +168,7 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, { const uint8_t *clear_color = arg.clear_color; - RAST_DEBUG("%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__, + LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__, clear_color[0], clear_color[1], clear_color[2], @@ -198,7 +198,7 @@ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, { unsigned i, j; - RAST_DEBUG("%s 0x%x\n", __FUNCTION__, arg.clear_zstencil); + LP_DBG(DEBUG_RAST, "%s 0x%x\n", __FUNCTION__, arg.clear_zstencil); for (i = 0; i < TILE_SIZE; i++) for (j = 0; j < TILE_SIZE; j++) @@ -213,7 +213,7 @@ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, void lp_rast_load_color( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg) { - RAST_DEBUG("%s\n", __FUNCTION__); + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); /* call u_tile func to load colors from surface */ } @@ -226,7 +226,7 @@ void lp_rast_load_color( struct lp_rasterizer *rast, void lp_rast_load_zstencil( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { - RAST_DEBUG("%s\n", __FUNCTION__); + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); /* call u_tile func to load depth (and stencil?) from surface */ } @@ -237,7 +237,7 @@ void lp_rast_set_state( struct lp_rasterizer *rast, { const struct lp_rast_state *state = arg.set_state; - RAST_DEBUG("%s %p\n", __FUNCTION__, (void *) state); + LP_DBG(DEBUG_RAST, "%s %p\n", __FUNCTION__, (void *) state); /* just set the current state pointer for this rasterizer */ rast->current_state = state; @@ -260,7 +260,7 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, const unsigned mask = ~0; unsigned x, y; - RAST_DEBUG("%s\n", __FUNCTION__); + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); /* Use the existing preference for 4x4 (four quads) shading: */ @@ -398,7 +398,7 @@ static void lp_rast_store_color( struct lp_rasterizer *rast ) if (y + h > rast->height) h -= y + h - rast->height; - RAST_DEBUG("%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); + LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); lp_tile_write_4ub(rast->cbuf_transfer->format, rast->tile.color, @@ -440,7 +440,7 @@ static void lp_rast_store_zstencil( struct lp_rasterizer *rast ) if (y + h > rast->height) h -= y + h - rast->height; - RAST_DEBUG("%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); + LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); assert(rast->zsbuf_transfer->format == PIPE_FORMAT_Z32_UNORM); lp_tile_write_z32(rast->tile.depth, @@ -455,7 +455,7 @@ static void lp_rast_store_zstencil( struct lp_rasterizer *rast ) */ void lp_rast_end_tile( struct lp_rasterizer *rast ) { - RAST_DEBUG("%s\n", __FUNCTION__); + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); if (rast->state.write_color) lp_rast_store_color(rast); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index e772a0158a..81a9c1c142 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -30,6 +30,7 @@ */ #include "util/u_math.h" +#include "lp_debug.h" #include "lp_rast_priv.h" #include "lp_tile_soa.h" @@ -185,7 +186,7 @@ lp_rast_triangle( struct lp_rasterizer *rast, assert(Elements(rast->blocks) == (TILE_SIZE * TILE_SIZE) / (4*4)); - debug_printf("%s\n", __FUNCTION__); + LP_DBG(DEBUG_RAST, "lp_rast_triangle\n"); rast->nr_blocks = 0; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 6136d1b57e..7f31df6ae5 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -37,12 +37,12 @@ #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_pack_color.h" +#include "lp_debug.h" #include "lp_state.h" #include "lp_buffer.h" #include "lp_texture.h" #include "lp_setup_context.h" -#define SETUP_DEBUG debug_printf static void set_state( struct setup_context *, unsigned ); @@ -82,7 +82,7 @@ static void reset_context( struct setup_context *setup ) { unsigned i, j; - SETUP_DEBUG("%s\n", __FUNCTION__); + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); /* Reset derived state */ setup->constants.stored_size = 0; @@ -238,7 +238,7 @@ rasterize_bins( struct setup_context *setup, struct lp_rasterizer *rast = setup->rast; unsigned i, j; - SETUP_DEBUG("%s\n", __FUNCTION__); + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); lp_rast_begin( rast, setup->fb.cbuf, @@ -261,7 +261,7 @@ rasterize_bins( struct setup_context *setup, reset_context( setup ); - SETUP_DEBUG("%s done \n", __FUNCTION__); + LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__); } @@ -269,7 +269,7 @@ rasterize_bins( struct setup_context *setup, static void begin_binning( struct setup_context *setup ) { - SETUP_DEBUG("%s\n", __FUNCTION__); + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); if (setup->fb.cbuf) { if (setup->clear.flags & PIPE_CLEAR_COLOR) @@ -289,7 +289,7 @@ begin_binning( struct setup_context *setup ) bin_everywhere( setup, lp_rast_load_zstencil, lp_rast_arg_null() ); } - SETUP_DEBUG("%s done\n", __FUNCTION__); + LP_DBG(DEBUG_SETUP, "%s done\n", __FUNCTION__); } @@ -301,7 +301,7 @@ begin_binning( struct setup_context *setup ) static void execute_clears( struct setup_context *setup ) { - SETUP_DEBUG("%s\n", __FUNCTION__); + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); begin_binning( setup ); rasterize_bins( setup, TRUE ); @@ -317,7 +317,7 @@ set_state( struct setup_context *setup, if (old_state == new_state) return; - SETUP_DEBUG("%s old %d new %d\n", __FUNCTION__, old_state, new_state); + LP_DBG(DEBUG_SETUP, "%s old %d new %d\n", __FUNCTION__, old_state, new_state); switch (new_state) { case SETUP_ACTIVE: @@ -347,7 +347,7 @@ void lp_setup_flush( struct setup_context *setup, unsigned flags ) { - SETUP_DEBUG("%s\n", __FUNCTION__); + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); set_state( setup, SETUP_FLUSHED ); } @@ -358,7 +358,7 @@ lp_setup_bind_framebuffer( struct setup_context *setup, struct pipe_surface *color, struct pipe_surface *zstencil ) { - SETUP_DEBUG("%s\n", __FUNCTION__); + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); set_state( setup, SETUP_FLUSHED ); @@ -401,7 +401,7 @@ lp_setup_clear( struct setup_context *setup, { unsigned i; - SETUP_DEBUG("%s state %d\n", __FUNCTION__, setup->state); + LP_DBG(DEBUG_SETUP, "%s state %d\n", __FUNCTION__, setup->state); if (flags & PIPE_CLEAR_COLOR) { @@ -451,7 +451,7 @@ lp_setup_set_triangle_state( struct setup_context *setup, unsigned cull_mode, boolean ccw_is_frontface) { - SETUP_DEBUG("%s\n", __FUNCTION__); + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); setup->ccw_is_frontface = ccw_is_frontface; setup->cullmode = cull_mode; @@ -465,7 +465,7 @@ lp_setup_set_fs_inputs( struct setup_context *setup, const struct lp_shader_input *input, unsigned nr ) { - SETUP_DEBUG("%s %p %u\n", __FUNCTION__, (void *) input, nr); + LP_DBG(DEBUG_SETUP, "%s %p %u\n", __FUNCTION__, (void *) input, nr); memcpy( setup->fs.input, input, nr * sizeof input[0] ); setup->fs.nr_inputs = nr; @@ -475,7 +475,7 @@ void lp_setup_set_fs( struct setup_context *setup, struct lp_fragment_shader *fs ) { - SETUP_DEBUG("%s %p\n", __FUNCTION__, (void *) fs); + LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) fs); /* FIXME: reference count */ setup->fs.current.jit_function = fs ? fs->current->jit_function : NULL; @@ -486,7 +486,7 @@ void lp_setup_set_fs_constants(struct setup_context *setup, struct pipe_buffer *buffer) { - SETUP_DEBUG("%s %p\n", __FUNCTION__, (void *) buffer); + LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) buffer); pipe_buffer_reference(&setup->constants.current, buffer); @@ -498,7 +498,7 @@ void lp_setup_set_alpha_ref_value( struct setup_context *setup, float alpha_ref_value ) { - SETUP_DEBUG("%s %f\n", __FUNCTION__, alpha_ref_value); + LP_DBG(DEBUG_SETUP, "%s %f\n", __FUNCTION__, alpha_ref_value); if(setup->fs.current.jit_context.alpha_ref_value != alpha_ref_value) { setup->fs.current.jit_context.alpha_ref_value = alpha_ref_value; @@ -510,7 +510,7 @@ void lp_setup_set_blend_color( struct setup_context *setup, const struct pipe_blend_color *blend_color ) { - SETUP_DEBUG("%s\n", __FUNCTION__); + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); assert(blend_color); @@ -527,7 +527,7 @@ lp_setup_set_sampler_textures( struct setup_context *setup, struct pipe_texture *dummy; unsigned i; - SETUP_DEBUG("%s\n", __FUNCTION__); + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); assert(num <= PIPE_MAX_SAMPLERS); @@ -569,7 +569,7 @@ lp_setup_is_texture_referenced( struct setup_context *setup, static INLINE void lp_setup_update_shader_state( struct setup_context *setup ) { - SETUP_DEBUG("%s\n", __FUNCTION__); + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); assert(setup->fs.current.jit_function); @@ -685,7 +685,7 @@ lp_setup_tri(struct setup_context *setup, const float (*v1)[4], const float (*v2)[4]) { - SETUP_DEBUG("%s\n", __FUNCTION__); + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); lp_setup_update_shader_state(setup); setup->triangle( setup, v0, v1, v2 ); -- cgit v1.2.3 From 9fca3e065b9ab5ef1389a76934bc24ed2b287a76 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Dec 2009 14:22:08 -0700 Subject: llvmpipe: simplify framebuffer state code --- src/gallium/drivers/llvmpipe/lp_setup.c | 29 +++++++++++++++---------- src/gallium/drivers/llvmpipe/lp_setup.h | 5 ++--- src/gallium/drivers/llvmpipe/lp_setup_context.h | 7 +----- src/gallium/drivers/llvmpipe/lp_state_surface.c | 4 +--- 4 files changed, 21 insertions(+), 24 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 7f31df6ae5..38609ec88a 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -241,12 +241,12 @@ rasterize_bins( struct setup_context *setup, LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); lp_rast_begin( rast, - setup->fb.cbuf, - setup->fb.zsbuf, - setup->fb.cbuf != NULL, - setup->fb.zsbuf != NULL && write_depth, - setup->fb.width, - setup->fb.height ); + setup->fb->cbufs[0], + setup->fb->zsbuf, + setup->fb->cbufs[0] != NULL, + setup->fb->zsbuf != NULL && write_depth, + setup->fb->width, + setup->fb->height ); /* loop over tile bins, rasterize each */ for (i = 0; i < setup->tiles_x; i++) { @@ -271,7 +271,7 @@ begin_binning( struct setup_context *setup ) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - if (setup->fb.cbuf) { + if (setup->fb->cbufs[0]) { if (setup->clear.flags & PIPE_CLEAR_COLOR) bin_everywhere( setup, lp_rast_clear_color, @@ -280,7 +280,7 @@ begin_binning( struct setup_context *setup ) bin_everywhere( setup, lp_rast_load_color, lp_rast_arg_null() ); } - if (setup->fb.zsbuf) { + if (setup->fb->zsbuf) { if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) bin_everywhere( setup, lp_rast_clear_zstencil, @@ -355,13 +355,13 @@ lp_setup_flush( struct setup_context *setup, void lp_setup_bind_framebuffer( struct setup_context *setup, - struct pipe_surface *color, - struct pipe_surface *zstencil ) + const struct pipe_framebuffer_state *fb ) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); set_state( setup, SETUP_FLUSHED ); +#if 0 pipe_surface_reference( &setup->fb.cbuf, color ); pipe_surface_reference( &setup->fb.zsbuf, zstencil ); @@ -386,9 +386,14 @@ lp_setup_bind_framebuffer( struct setup_context *setup, setup->fb.height = MIN2(setup->fb.cbuf->height, setup->fb.zsbuf->height); } - setup->tiles_x = align(setup->fb.width, TILE_SIZE) / TILE_SIZE; setup->tiles_y = align(setup->fb.height, TILE_SIZE) / TILE_SIZE; +#else + setup->fb = fb; + setup->tiles_x = align(setup->fb->width, TILE_SIZE) / TILE_SIZE; + setup->tiles_y = align(setup->fb->height, TILE_SIZE) / TILE_SIZE; +#endif + } @@ -411,7 +416,7 @@ lp_setup_clear( struct setup_context *setup, if (flags & PIPE_CLEAR_DEPTHSTENCIL) { setup->clear.zstencil.clear_zstencil = - util_pack_z_stencil(setup->fb.zsbuf->format, + util_pack_z_stencil(setup->fb->zsbuf->format, depth, stencil); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 1edd7410fc..66a7f29f1e 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -53,7 +53,7 @@ struct pipe_surface; struct pipe_buffer; struct pipe_blend_color; struct pipe_screen; -struct setup_context; +struct pipe_framebuffer_state; struct lp_fragment_shader; struct lp_jit_context; @@ -90,8 +90,7 @@ lp_setup_flush( struct setup_context *setup, void lp_setup_bind_framebuffer( struct setup_context *setup, - struct pipe_surface *color, - struct pipe_surface *zstencil ); + const struct pipe_framebuffer_state *fb ); void lp_setup_set_triangle_state( struct setup_context *setup, diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 180b8f6e88..dc12eb7847 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -79,12 +79,7 @@ struct setup_context { boolean ccw_is_frontface; unsigned cullmode; - struct { - struct pipe_surface *cbuf; - struct pipe_surface *zsbuf; - unsigned width; - unsigned height; - } fb; + const struct pipe_framebuffer_state *fb; struct { unsigned flags; diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c index 909ca9f117..3eff40e3f1 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -83,9 +83,7 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, } if (dirty) { - lp_setup_bind_framebuffer( lp->setup, - fb->cbufs[0], - fb->zsbuf ); + lp_setup_bind_framebuffer( lp->setup, fb ); lp->dirty |= LP_NEW_FRAMEBUFFER; } -- cgit v1.2.3 From b533b56750aca8c7e8cb22af93a0fc2a0cfc0d97 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Dec 2009 14:47:40 -0700 Subject: llvmpipe: move lp_rasterize_bin() into lp_rast.c First step of moving bin rasterization/execution code out of lp_setup.c --- src/gallium/drivers/llvmpipe/lp_rast.c | 37 +++++++++++++++++++++++++++++---- src/gallium/drivers/llvmpipe/lp_rast.h | 18 ++++++---------- src/gallium/drivers/llvmpipe/lp_setup.c | 29 +++----------------------- 3 files changed, 42 insertions(+), 42 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 5891a2a706..a466aec379 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -33,6 +33,7 @@ #include "lp_rast_priv.h" #include "lp_tile_soa.h" #include "lp_bld_debug.h" +#include "lp_bin.h" struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen ) @@ -148,9 +149,9 @@ void lp_rast_end( struct lp_rasterizer *rast ) * \param x window X position of the tile, in pixels * \param y window Y position of the tile, in pixels */ -void lp_rast_start_tile( struct lp_rasterizer *rast, - unsigned x, - unsigned y ) +static void +lp_rast_start_tile( struct lp_rasterizer *rast, + unsigned x, unsigned y ) { LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y); @@ -453,7 +454,8 @@ static void lp_rast_store_zstencil( struct lp_rasterizer *rast ) /** * Write the rasterizer's tiles to the framebuffer. */ -void lp_rast_end_tile( struct lp_rasterizer *rast ) +static void +lp_rast_end_tile( struct lp_rasterizer *rast ) { LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); @@ -465,6 +467,33 @@ void lp_rast_end_tile( struct lp_rasterizer *rast ) } +/** + * Rasterize commands for a single bin. + * Must be called between lp_rast_begin() and lp_rast_end(). + */ +void +lp_rasterize_bin( struct lp_rasterizer *rast, + const struct cmd_bin *bin, + int x, int y) +{ + const struct cmd_block_list *commands = &bin->commands; + struct cmd_block *block; + unsigned k; + + lp_rast_start_tile( rast, x, y ); + + /* simply execute each of the commands in the block list */ + for (block = commands->head; block; block = block->next) { + for (k = 0; k < block->count; k++) { + block->cmd[k]( rast, block->arg[k] ); + } + } + + lp_rast_end_tile( rast ); +} + + + /* Shutdown: */ void lp_rast_destroy( struct lp_rasterizer *rast ) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 21bbf104b1..3d2388b894 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -47,6 +47,7 @@ * individual function calls like this. */ struct lp_rasterizer; +struct cmd_bin; struct pipe_screen; #define FIXED_ORDER 4 @@ -141,14 +142,13 @@ boolean lp_rast_begin( struct lp_rasterizer *rast, unsigned width, unsigned height ); -void lp_rast_end( struct lp_rasterizer * ); +void +lp_rasterize_bin( struct lp_rasterizer *rast, + const struct cmd_bin *bin, + int x, int y); -/* Begining of each tile: - */ -void lp_rast_start_tile( struct lp_rasterizer *, - unsigned x, - unsigned y ); +void lp_rast_end( struct lp_rasterizer * ); union lp_rast_cmd_arg { @@ -224,10 +224,4 @@ void lp_rast_shade_tile( struct lp_rasterizer *, const union lp_rast_cmd_arg ); -/* End of tile: - */ - -void lp_rast_end_tile( struct lp_rasterizer *rast ); - - #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 38609ec88a..47d2ac8e11 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -207,29 +207,6 @@ bin_state_command( struct setup_context *setup, } -/** Rasterize commands for a single bin */ -static void -rasterize_bin( struct lp_rasterizer *rast, - const struct cmd_bin *bin, - int x, int y) -{ - const struct cmd_block_list *commands = &bin->commands; - struct cmd_block *block; - unsigned k; - - lp_rast_start_tile( rast, x, y ); - - /* simply execute each of the commands in the block list */ - for (block = commands->head; block; block = block->next) { - for (k = 0; k < block->count; k++) { - block->cmd[k]( rast, block->arg[k] ); - } - } - - lp_rast_end_tile( rast ); -} - - /** Rasterize all tile's bins */ static void rasterize_bins( struct setup_context *setup, @@ -251,9 +228,9 @@ rasterize_bins( struct setup_context *setup, /* loop over tile bins, rasterize each */ for (i = 0; i < setup->tiles_x; i++) { for (j = 0; j < setup->tiles_y; j++) { - rasterize_bin( rast, &setup->tile[i][j], - i * TILE_SIZE, - j * TILE_SIZE ); + lp_rasterize_bin( rast, &setup->tile[i][j], + i * TILE_SIZE, + j * TILE_SIZE ); } } -- cgit v1.2.3 From 01b1900084152dbacd4025a31ced25f75666ce59 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Dec 2009 15:31:09 -0700 Subject: llvmpipe: reorganization of binning data structions and funtions New lp_bins struct contains all bin information. More move bin-related code into lp_bin.[ch] Use new/updated bin-access functions to hide implementation details. The result is more/cleaner separation between the setup and rast components. This will make double-buffering of the bins easier, etc. --- src/gallium/drivers/llvmpipe/lp_bin.c | 78 +++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_bin.h | 61 +++++++++++++--- src/gallium/drivers/llvmpipe/lp_rast.c | 51 ++++++++++++-- src/gallium/drivers/llvmpipe/lp_rast.h | 21 ++---- src/gallium/drivers/llvmpipe/lp_setup.c | 93 ++++--------------------- src/gallium/drivers/llvmpipe/lp_setup_context.h | 17 +---- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 26 +++---- 7 files changed, 209 insertions(+), 138 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_bin.c b/src/gallium/drivers/llvmpipe/lp_bin.c index f43cdcbf3d..1f05416b3e 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin.c +++ b/src/gallium/drivers/llvmpipe/lp_bin.c @@ -29,6 +29,84 @@ #include "lp_bin.h" +void +lp_init_bins(struct lp_bins *bins) +{ + unsigned i, j; + for (i = 0; i < TILES_X; i++) + for (j = 0; j < TILES_Y; j++) { + struct cmd_bin *bin = lp_get_bin(bins, i, j); + bin->commands.head = bin->commands.tail = CALLOC_STRUCT(cmd_block); + } + + bins->data.head = + bins->data.tail = CALLOC_STRUCT(data_block); +} + + +void +lp_reset_bins(struct lp_bins *bins, unsigned tiles_x, unsigned tiles_y) +{ + unsigned i, j; + + /* Free all but last binner command lists: + */ + for (i = 0; i < tiles_x; i++) { + for (j = 0; j < tiles_y; j++) { + struct cmd_bin *bin = lp_get_bin(bins, i, j); + struct cmd_block_list *list = &bin->commands; + struct cmd_block *block; + struct cmd_block *tmp; + + for (block = list->head; block != list->tail; block = tmp) { + tmp = block->next; + FREE(block); + } + + assert(list->tail->next == NULL); + list->head = list->tail; + list->head->count = 0; + } + } + + /* Free all but last binned data block: + */ + { + struct data_block_list *list = &bins->data; + struct data_block *block, *tmp; + + for (block = list->head; block != list->tail; block = tmp) { + tmp = block->next; + FREE(block); + } + + assert(list->tail->next == NULL); + list->head = list->tail; + list->head->used = 0; + } +} + + +void +lp_free_bin_data(struct lp_bins *bins) +{ + unsigned i, j; + + for (i = 0; i < TILES_X; i++) + for (j = 0; j < TILES_Y; j++) { + struct cmd_bin *bin = lp_get_bin(bins, i, j); + /* lp_reset_bins() should have been already called */ + assert(bin->commands.head == bin->commands.tail); + FREE(bin->commands.head); + bin->commands.head = NULL; + bin->commands.tail = NULL; + } + + FREE(bins->data.head); + bins->data.head = NULL; +} + + void lp_bin_new_cmd_block( struct cmd_block_list *list ) { diff --git a/src/gallium/drivers/llvmpipe/lp_bin.h b/src/gallium/drivers/llvmpipe/lp_bin.h index fa25d78631..4d12b93274 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin.h +++ b/src/gallium/drivers/llvmpipe/lp_bin.h @@ -35,9 +35,19 @@ #ifndef LP_BIN_H #define LP_BIN_H +#include "lp_tile_soa.h" #include "lp_rast.h" +/* We're limited to 2K by 2K for 32bit fixed point rasterization. + * Will need a 64-bit version for larger framebuffers. + */ +#define MAXHEIGHT 2048 +#define MAXWIDTH 2048 +#define TILES_X (MAXWIDTH / TILE_SIZE) +#define TILES_Y (MAXHEIGHT / TILE_SIZE) + + #define CMD_BLOCK_MAX 128 #define DATA_BLOCK_SIZE (16 * 1024 - sizeof(unsigned) - sizeof(void *)) @@ -84,19 +94,40 @@ struct data_block_list { }; +/** + * All bins and bin data are contained here. + * Per-bin data goes into the 'tile' bins. + * Shared bin data goes into the 'data' buffer. + * When there are multiple threads, will want to double-buffer the + * bin arrays: + */ +struct lp_bins { + struct cmd_bin tile[TILES_X][TILES_Y]; + struct data_block_list data; +}; + + + +void lp_init_bins(struct lp_bins *bins); -extern void lp_bin_new_data_block( struct data_block_list *list ); +void lp_reset_bins(struct lp_bins *bins, unsigned tiles_x, unsigned tiles_y); -extern void lp_bin_new_cmd_block( struct cmd_block_list *list ); +void lp_free_bin_data(struct lp_bins *bins); + +void lp_bin_new_data_block( struct data_block_list *list ); + +void lp_bin_new_cmd_block( struct cmd_block_list *list ); /** - * Allocate space for a command/data in the given block list. + * Allocate space for a command/data in the bin's data buffer. * Grow the block list if needed. */ static INLINE void * -lp_bin_alloc( struct data_block_list *list, unsigned size) +lp_bin_alloc( struct lp_bins *bins, unsigned size) { + struct data_block_list *list = &bins->data; + if (list->tail->used + size > DATA_BLOCK_SIZE) { lp_bin_new_data_block( list ); } @@ -114,9 +145,11 @@ lp_bin_alloc( struct data_block_list *list, unsigned size) * As above, but with specific alignment. */ static INLINE void * -lp_bin_alloc_aligned( struct data_block_list *list, unsigned size, +lp_bin_alloc_aligned( struct lp_bins *bins, unsigned size, unsigned alignment ) { + struct data_block_list *list = &bins->data; + if (list->tail->used + size + alignment - 1 > DATA_BLOCK_SIZE) { lp_bin_new_data_block( list ); } @@ -134,20 +167,32 @@ lp_bin_alloc_aligned( struct data_block_list *list, unsigned size, /* Put back data if we decide not to use it, eg. culled triangles. */ static INLINE void -lp_bin_putback_data( struct data_block_list *list, unsigned size) +lp_bin_putback_data( struct lp_bins *bins, unsigned size) { + struct data_block_list *list = &bins->data; assert(list->tail->used >= size); list->tail->used -= size; } -/* Add a command to a given bin. +/** Return pointer to a particular tile's bin. */ +static INLINE struct cmd_bin * +lp_get_bin(struct lp_bins *bins, unsigned x, unsigned y) +{ + return &bins->tile[x][y]; +} + + + +/* Add a command to bin[x][y]. */ static INLINE void -lp_bin_command( struct cmd_bin *bin, +lp_bin_command( struct lp_bins *bins, + unsigned x, unsigned y, lp_rast_cmd cmd, union lp_rast_cmd_arg arg ) { + struct cmd_bin *bin = lp_get_bin(bins, x, y); struct cmd_block_list *list = &bin->commands; if (list->tail->count == CMD_BLOCK_MAX) { diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index a466aec379..87e3bfcd3f 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -56,7 +56,8 @@ struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen ) * Begin the rasterization phase. * Map the framebuffer surfaces. Initialize the 'rast' state. */ -boolean lp_rast_begin( struct lp_rasterizer *rast, +static boolean +lp_rast_begin( struct lp_rasterizer *rast, struct pipe_surface *cbuf, struct pipe_surface *zsbuf, boolean write_color, @@ -121,7 +122,8 @@ boolean lp_rast_begin( struct lp_rasterizer *rast, * Finish the rasterization phase. * Unmap framebuffer surfaces. */ -void lp_rast_end( struct lp_rasterizer *rast ) +static void +lp_rast_end( struct lp_rasterizer *rast ) { struct pipe_screen *screen = rast->screen; @@ -469,12 +471,13 @@ lp_rast_end_tile( struct lp_rasterizer *rast ) /** * Rasterize commands for a single bin. + * \param x, y position of the bin's tile in the framebuffer * Must be called between lp_rast_begin() and lp_rast_end(). */ -void -lp_rasterize_bin( struct lp_rasterizer *rast, - const struct cmd_bin *bin, - int x, int y) +static void +rasterize_bin( struct lp_rasterizer *rast, + const struct cmd_bin *bin, + int x, int y) { const struct cmd_block_list *commands = &bin->commands; struct cmd_block *block; @@ -493,6 +496,42 @@ lp_rasterize_bin( struct lp_rasterizer *rast, } +/** + * Rasterize/execute all bins. + */ +void +lp_rasterize_bins( struct lp_rasterizer *rast, + struct lp_bins *bins, + unsigned tiles_x, unsigned tiles_y, + const struct pipe_framebuffer_state *fb, + bool write_depth ) +{ + unsigned i, j; + + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + + lp_rast_begin( rast, + fb->cbufs[0], + fb->zsbuf, + fb->cbufs[0] != NULL, + fb->zsbuf != NULL && write_depth, + fb->width, + fb->height ); + + /* loop over tile bins, rasterize each */ + for (i = 0; i < tiles_x; i++) { + for (j = 0; j < tiles_y; j++) { + struct cmd_bin *bin = lp_get_bin(bins, i, j); + rasterize_bin( rast, bin, i * TILE_SIZE, j * TILE_SIZE ); + } + } + + lp_rast_end( rast ); + + LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__); +} + + /* Shutdown: */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 3d2388b894..e623eafc9a 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -47,6 +47,7 @@ * individual function calls like this. */ struct lp_rasterizer; +struct lp_bins; struct cmd_bin; struct pipe_screen; @@ -133,22 +134,12 @@ struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen ); void lp_rast_destroy( struct lp_rasterizer * ); +void lp_rasterize_bins( struct lp_rasterizer *rast, + struct lp_bins *bins, + unsigned tiles_x, unsigned tiles_y, + const struct pipe_framebuffer_state *fb, + bool write_depth ); -boolean lp_rast_begin( struct lp_rasterizer *rast, - struct pipe_surface *cbuf, - struct pipe_surface *zsbuf, - boolean write_color, - boolean write_zstencil, - unsigned width, - unsigned height ); - -void -lp_rasterize_bin( struct lp_rasterizer *rast, - const struct cmd_bin *bin, - int x, int y); - - -void lp_rast_end( struct lp_rasterizer * ); union lp_rast_cmd_arg { diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 47d2ac8e11..efaf5acfe8 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -80,8 +80,6 @@ first_point( struct setup_context *setup, static void reset_context( struct setup_context *setup ) { - unsigned i, j; - LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); /* Reset derived state */ @@ -90,40 +88,7 @@ static void reset_context( struct setup_context *setup ) setup->fs.stored = NULL; setup->dirty = ~0; - /* Free all but last binner command lists: - */ - for (i = 0; i < setup->tiles_x; i++) { - for (j = 0; j < setup->tiles_y; j++) { - struct cmd_block_list *list = &setup->tile[i][j].commands; - struct cmd_block *block; - struct cmd_block *tmp; - - for (block = list->head; block != list->tail; block = tmp) { - tmp = block->next; - FREE(block); - } - - assert(list->tail->next == NULL); - list->head = list->tail; - list->head->count = 0; - } - } - - /* Free all but last binned data block: - */ - { - struct data_block_list *list = &setup->data; - struct data_block *block, *tmp; - - for (block = list->head; block != list->tail; block = tmp) { - tmp = block->next; - FREE(block); - } - - assert(list->tail->next == NULL); - list->head = list->tail; - list->head->used = 0; - } + lp_reset_bins(&setup->bins, setup->tiles_x, setup->tiles_y); /* Reset some state: */ @@ -177,7 +142,7 @@ static void bin_everywhere( struct setup_context *setup, unsigned i, j; for (i = 0; i < setup->tiles_x; i++) for (j = 0; j < setup->tiles_y; j++) - lp_bin_command( &setup->tile[i][j], cmd, arg ); + lp_bin_command( &setup->bins, i, j, cmd, arg ); } @@ -194,13 +159,13 @@ bin_state_command( struct setup_context *setup, unsigned i, j; for (i = 0; i < setup->tiles_x; i++) { for (j = 0; j < setup->tiles_y; j++) { - struct cmd_bin *bin = &setup->tile[i][j]; + struct cmd_bin *bin = &setup->bins.tile[i][j]; lp_rast_cmd last_cmd = lp_get_last_command(bin); if (last_cmd == cmd) { lp_replace_last_command_arg(bin, arg); } else { - lp_bin_command( bin, cmd, arg ); + lp_bin_command( &setup->bins, i, j, cmd, arg ); } } } @@ -212,29 +177,10 @@ static void rasterize_bins( struct setup_context *setup, boolean write_depth ) { - struct lp_rasterizer *rast = setup->rast; - unsigned i, j; - - LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - - lp_rast_begin( rast, - setup->fb->cbufs[0], - setup->fb->zsbuf, - setup->fb->cbufs[0] != NULL, - setup->fb->zsbuf != NULL && write_depth, - setup->fb->width, - setup->fb->height ); - - /* loop over tile bins, rasterize each */ - for (i = 0; i < setup->tiles_x; i++) { - for (j = 0; j < setup->tiles_y; j++) { - lp_rasterize_bin( rast, &setup->tile[i][j], - i * TILE_SIZE, - j * TILE_SIZE ); - } - } - - lp_rast_end( rast ); + lp_rasterize_bins(setup->rast, + &setup->bins, setup->tiles_x, setup->tiles_y, + setup->fb, + write_depth); reset_context( setup ); @@ -559,7 +505,7 @@ lp_setup_update_shader_state( struct setup_context *setup ) uint8_t *stored; unsigned i, j; - stored = lp_bin_alloc_aligned(&setup->data, 4 * 16, 16); + stored = lp_bin_alloc_aligned(&setup->bins, 4 * 16, 16); /* smear each blend color component across 16 ubyte elements */ for (i = 0; i < 4; ++i) { @@ -591,7 +537,7 @@ lp_setup_update_shader_state( struct setup_context *setup ) current_size) != 0) { void *stored; - stored = lp_bin_alloc(&setup->data, current_size); + stored = lp_bin_alloc(&setup->bins, current_size); if(stored) { memcpy(stored, current_data, @@ -621,7 +567,7 @@ lp_setup_update_shader_state( struct setup_context *setup ) * and append it to the bin's setup data buffer. */ struct lp_rast_state *stored = - (struct lp_rast_state *) lp_bin_alloc(&setup->data, sizeof *stored); + (struct lp_rast_state *) lp_bin_alloc(&setup->bins, sizeof *stored); if(stored) { memcpy(stored, &setup->fs.current, @@ -677,17 +623,11 @@ lp_setup_tri(struct setup_context *setup, void lp_setup_destroy( struct setup_context *setup ) { - unsigned i, j; - reset_context( setup ); pipe_buffer_reference(&setup->constants.current, NULL); - for (i = 0; i < TILES_X; i++) - for (j = 0; j < TILES_Y; j++) - FREE(setup->tile[i][j].commands.head); - - FREE(setup->data.head); + lp_free_bin_data(&setup->bins); lp_rast_destroy( setup->rast ); FREE( setup ); @@ -702,19 +642,12 @@ struct setup_context * lp_setup_create( struct pipe_screen *screen ) { struct setup_context *setup = CALLOC_STRUCT(setup_context); - unsigned i, j; setup->rast = lp_rast_create( screen ); if (!setup->rast) goto fail; - for (i = 0; i < TILES_X; i++) - for (j = 0; j < TILES_Y; j++) - setup->tile[i][j].commands.head = - setup->tile[i][j].commands.tail = CALLOC_STRUCT(cmd_block); - - setup->data.head = - setup->data.tail = CALLOC_STRUCT(data_block); + lp_init_bins(&setup->bins); setup->triangle = first_triangle; setup->line = first_line; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index dc12eb7847..8478bb9014 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -40,14 +40,6 @@ #include "lp_tile_soa.h" /* for TILE_SIZE */ #include "lp_bin.h" -/* We're limited to 2K by 2K for 32bit fixed point rasterization. - * Will need a 64-bit version for larger framebuffers. - */ -#define MAXHEIGHT 2048 -#define MAXWIDTH 2048 -#define TILES_X (MAXWIDTH / TILE_SIZE) -#define TILES_Y (MAXHEIGHT / TILE_SIZE) - #define LP_SETUP_NEW_FS 0x01 #define LP_SETUP_NEW_CONSTANTS 0x02 @@ -63,14 +55,7 @@ struct setup_context { struct lp_rasterizer *rast; - /** - * Per-bin data goes into the 'tile' bins. - * Shared bin data goes into the 'data' buffer. - * When there are multiple threads, will want to double-buffer the - * bin arrays: - */ - struct cmd_bin tile[TILES_X][TILES_Y]; - struct data_block_list data; + struct lp_bins bins; /* size of framebuffer, in tiles */ unsigned tiles_x; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 5e53b4050e..b8f79849e8 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -185,9 +185,9 @@ static void setup_tri_coefficients( struct setup_context *setup, { unsigned bytes; bytes = (setup->fs.nr_inputs + 1) * 4 * sizeof(float); - tri->inputs.a0 = lp_bin_alloc_aligned( &setup->data, bytes, 16 ); - tri->inputs.dadx = lp_bin_alloc_aligned( &setup->data, bytes, 16 ); - tri->inputs.dady = lp_bin_alloc_aligned( &setup->data, bytes, 16 ); + tri->inputs.a0 = lp_bin_alloc_aligned( &setup->bins, bytes, 16 ); + tri->inputs.dadx = lp_bin_alloc_aligned( &setup->bins, bytes, 16 ); + tri->inputs.dady = lp_bin_alloc_aligned( &setup->bins, bytes, 16 ); } /* The internal position input is in slot zero: @@ -263,7 +263,7 @@ do_triangle_ccw(struct setup_context *setup, const int y2 = subpixel_snap(v2[0][1]); const int y3 = subpixel_snap(v3[0][1]); - struct lp_rast_triangle *tri = lp_bin_alloc( &setup->data, sizeof *tri ); + struct lp_rast_triangle *tri = lp_bin_alloc( &setup->bins, sizeof *tri ); float area, oneoverarea; int minx, maxx, miny, maxy; @@ -283,7 +283,7 @@ do_triangle_ccw(struct setup_context *setup, * XXX: subject to overflow?? */ if (area <= 0) { - lp_bin_putback_data( &setup->data, sizeof *tri ); + lp_bin_putback_data( &setup->bins, sizeof *tri ); return; } @@ -295,7 +295,7 @@ do_triangle_ccw(struct setup_context *setup, if (tri->miny == tri->maxy || tri->minx == tri->maxx) { - lp_bin_putback_data( &setup->data, sizeof *tri ); + lp_bin_putback_data( &setup->bins, sizeof *tri ); return; } @@ -405,7 +405,7 @@ do_triangle_ccw(struct setup_context *setup, { /* Triangle is contained in a single tile: */ - lp_bin_command( &setup->tile[minx][miny], lp_rast_triangle, + lp_bin_command( &setup->bins, minx, miny, lp_rast_triangle, lp_rast_arg_triangle(tri) ); } else @@ -464,17 +464,17 @@ do_triangle_ccw(struct setup_context *setup, { in = 1; /* triangle covers the whole tile- shade whole tile */ - lp_bin_command( &setup->tile[x][y], - lp_rast_shade_tile, - lp_rast_arg_inputs(&tri->inputs) ); + lp_bin_command( &setup->bins, x, y, + lp_rast_shade_tile, + lp_rast_arg_inputs(&tri->inputs) ); } else { in = 1; /* shade partial tile */ - lp_bin_command( &setup->tile[x][y], - lp_rast_triangle, - lp_rast_arg_triangle(tri) ); + lp_bin_command( &setup->bins, x, y, + lp_rast_triangle, + lp_rast_arg_triangle(tri) ); } /* Iterate cx values across the region: -- cgit v1.2.3 From 2c8d5c66ce2ddc0b7182e4844690736fc4c47212 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Dec 2009 15:46:37 -0700 Subject: llvmpipe: remove dead code left over from a previous commit --- src/gallium/drivers/llvmpipe/lp_setup.c | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index efaf5acfe8..e561e8e9b6 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -284,39 +284,9 @@ lp_setup_bind_framebuffer( struct setup_context *setup, set_state( setup, SETUP_FLUSHED ); -#if 0 - pipe_surface_reference( &setup->fb.cbuf, color ); - pipe_surface_reference( &setup->fb.zsbuf, zstencil ); - - if (!setup->fb.cbuf && !setup->fb.zsbuf) { - setup->fb.width = 0; - setup->fb.height = 0; - } - else if (!setup->fb.zsbuf) { - setup->fb.width = setup->fb.cbuf->width; - setup->fb.height = setup->fb.cbuf->height; - } - else if (!setup->fb.cbuf) { - setup->fb.width = setup->fb.zsbuf->width; - setup->fb.height = setup->fb.zsbuf->height; - } - else { - /* XXX: not sure what we're really supposed to do for - * mis-matched color & depth buffer sizes. - */ - setup->fb.width = MIN2(setup->fb.cbuf->width, - setup->fb.zsbuf->width); - setup->fb.height = MIN2(setup->fb.cbuf->height, - setup->fb.zsbuf->height); - } - setup->tiles_x = align(setup->fb.width, TILE_SIZE) / TILE_SIZE; - setup->tiles_y = align(setup->fb.height, TILE_SIZE) / TILE_SIZE; -#else setup->fb = fb; setup->tiles_x = align(setup->fb->width, TILE_SIZE) / TILE_SIZE; setup->tiles_y = align(setup->fb->height, TILE_SIZE) / TILE_SIZE; -#endif - } -- cgit v1.2.3 From 8a23105fa016ec4368f407ca64e7763f110da4e5 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Dec 2009 15:59:25 -0700 Subject: llvmpipe: still more bin code reorganization Move tiles_x,y fields from setup state into bin state. Move more bin-adding commands into lp_bin.[ch]. --- src/gallium/drivers/llvmpipe/lp_bin.c | 70 +++++++++++++- src/gallium/drivers/llvmpipe/lp_bin.h | 32 ++++++- src/gallium/drivers/llvmpipe/lp_rast.c | 5 +- src/gallium/drivers/llvmpipe/lp_rast.h | 1 - src/gallium/drivers/llvmpipe/lp_setup.c | 120 ++++++------------------ src/gallium/drivers/llvmpipe/lp_setup_context.h | 4 - 6 files changed, 130 insertions(+), 102 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_bin.c b/src/gallium/drivers/llvmpipe/lp_bin.c index 1f05416b3e..160a8d865b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin.c +++ b/src/gallium/drivers/llvmpipe/lp_bin.c @@ -45,14 +45,14 @@ lp_init_bins(struct lp_bins *bins) void -lp_reset_bins(struct lp_bins *bins, unsigned tiles_x, unsigned tiles_y) +lp_reset_bins(struct lp_bins *bins ) { unsigned i, j; /* Free all but last binner command lists: */ - for (i = 0; i < tiles_x; i++) { - for (j = 0; j < tiles_y; j++) { + for (i = 0; i < bins->tiles_x; i++) { + for (j = 0; j < bins->tiles_y; j++) { struct cmd_bin *bin = lp_get_bin(bins, i, j); struct cmd_block_list *list = &bin->commands; struct cmd_block *block; @@ -107,6 +107,14 @@ lp_free_bin_data(struct lp_bins *bins) } +void +lp_bin_set_num_bins( struct lp_bins *bins, + unsigned tiles_x, unsigned tiles_y ) +{ + bins->tiles_x = tiles_x; + bins->tiles_y = tiles_y; +} + void lp_bin_new_cmd_block( struct cmd_block_list *list ) { @@ -127,3 +135,59 @@ lp_bin_new_data_block( struct data_block_list *list ) block->next = NULL; block->used = 0; } + + +/** + * Return last command in the bin + */ +static lp_rast_cmd +lp_get_last_command( const struct cmd_bin *bin ) +{ + const struct cmd_block *tail = bin->commands.tail; + const unsigned i = tail->count; + if (i > 0) + return tail->cmd[i - 1]; + else + return NULL; +} + + +/** + * Replace the arg of the last command in the bin. + */ +static void +lp_replace_last_command_arg( struct cmd_bin *bin, + const union lp_rast_cmd_arg arg ) +{ + struct cmd_block *tail = bin->commands.tail; + const unsigned i = tail->count; + assert(i > 0); + tail->arg[i - 1] = arg; +} + + + +/** + * Put a state-change command into all bins. + * If we find that the last command in a bin was also a state-change + * command, we can simply replace that one with the new one. + */ +void +lp_bin_state_command( struct lp_bins *bins, + lp_rast_cmd cmd, + const union lp_rast_cmd_arg arg ) +{ + unsigned i, j; + for (i = 0; i < bins->tiles_x; i++) { + for (j = 0; j < bins->tiles_y; j++) { + struct cmd_bin *bin = lp_get_bin(bins, i, j); + lp_rast_cmd last_cmd = lp_get_last_command(bin); + if (last_cmd == cmd) { + lp_replace_last_command_arg(bin, arg); + } + else { + lp_bin_command( bins, i, j, cmd, arg ); + } + } + } +} diff --git a/src/gallium/drivers/llvmpipe/lp_bin.h b/src/gallium/drivers/llvmpipe/lp_bin.h index 4d12b93274..fcbb975ad6 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin.h +++ b/src/gallium/drivers/llvmpipe/lp_bin.h @@ -104,16 +104,26 @@ struct data_block_list { struct lp_bins { struct cmd_bin tile[TILES_X][TILES_Y]; struct data_block_list data; + + /** + * Number of active tiles in each dimension. + * This basically the framebuffer size divided by tile size + */ + unsigned tiles_x, tiles_y; }; void lp_init_bins(struct lp_bins *bins); -void lp_reset_bins(struct lp_bins *bins, unsigned tiles_x, unsigned tiles_y); +void lp_reset_bins(struct lp_bins *bins ); void lp_free_bin_data(struct lp_bins *bins); +void +lp_bin_set_num_bins( struct lp_bins *bins, + unsigned tiles_x, unsigned tiles_y ); + void lp_bin_new_data_block( struct data_block_list *list ); void lp_bin_new_cmd_block( struct cmd_block_list *list ); @@ -209,4 +219,24 @@ lp_bin_command( struct lp_bins *bins, } +/* Add a command to all active bins. + */ +static INLINE void +lp_bin_everywhere( struct lp_bins *bins, + lp_rast_cmd cmd, + const union lp_rast_cmd_arg arg ) +{ + unsigned i, j; + for (i = 0; i < bins->tiles_x; i++) + for (j = 0; j < bins->tiles_y; j++) + lp_bin_command( bins, i, j, cmd, arg ); +} + + +void +lp_bin_state_command( struct lp_bins *bins, + lp_rast_cmd cmd, + const union lp_rast_cmd_arg arg ); + + #endif /* LP_BIN_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 87e3bfcd3f..642f1b9079 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -502,7 +502,6 @@ rasterize_bin( struct lp_rasterizer *rast, void lp_rasterize_bins( struct lp_rasterizer *rast, struct lp_bins *bins, - unsigned tiles_x, unsigned tiles_y, const struct pipe_framebuffer_state *fb, bool write_depth ) { @@ -519,8 +518,8 @@ lp_rasterize_bins( struct lp_rasterizer *rast, fb->height ); /* loop over tile bins, rasterize each */ - for (i = 0; i < tiles_x; i++) { - for (j = 0; j < tiles_y; j++) { + for (i = 0; i < bins->tiles_x; i++) { + for (j = 0; j < bins->tiles_y; j++) { struct cmd_bin *bin = lp_get_bin(bins, i, j); rasterize_bin( rast, bin, i * TILE_SIZE, j * TILE_SIZE ); } diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index e623eafc9a..e77c77b776 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -136,7 +136,6 @@ void lp_rast_destroy( struct lp_rasterizer * ); void lp_rasterize_bins( struct lp_rasterizer *rast, struct lp_bins *bins, - unsigned tiles_x, unsigned tiles_y, const struct pipe_framebuffer_state *fb, bool write_depth ); diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index e561e8e9b6..4935d5b540 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -88,7 +88,7 @@ static void reset_context( struct setup_context *setup ) setup->fs.stored = NULL; setup->dirty = ~0; - lp_reset_bins(&setup->bins, setup->tiles_x, setup->tiles_y); + lp_reset_bins( &setup->bins ); /* Reset some state: */ @@ -103,82 +103,13 @@ static void reset_context( struct setup_context *setup ) } -/** - * Return last command in the bin - */ -static lp_rast_cmd -lp_get_last_command( const struct cmd_bin *bin ) -{ - const struct cmd_block *tail = bin->commands.tail; - const unsigned i = tail->count; - if (i > 0) - return tail->cmd[i - 1]; - else - return NULL; -} - - -/** - * Replace the arg of the last command in the bin. - */ -static void -lp_replace_last_command_arg( struct cmd_bin *bin, - const union lp_rast_cmd_arg arg ) -{ - struct cmd_block *tail = bin->commands.tail; - const unsigned i = tail->count; - assert(i > 0); - tail->arg[i - 1] = arg; -} - - - -/* Add a command to all active bins. - */ -static void bin_everywhere( struct setup_context *setup, - lp_rast_cmd cmd, - const union lp_rast_cmd_arg arg ) -{ - unsigned i, j; - for (i = 0; i < setup->tiles_x; i++) - for (j = 0; j < setup->tiles_y; j++) - lp_bin_command( &setup->bins, i, j, cmd, arg ); -} - - -/** - * Put a state-change command into all bins. - * If we find that the last command in a bin was also a state-change - * command, we can simply replace that one with the new one. - */ -static void -bin_state_command( struct setup_context *setup, - lp_rast_cmd cmd, - const union lp_rast_cmd_arg arg ) -{ - unsigned i, j; - for (i = 0; i < setup->tiles_x; i++) { - for (j = 0; j < setup->tiles_y; j++) { - struct cmd_bin *bin = &setup->bins.tile[i][j]; - lp_rast_cmd last_cmd = lp_get_last_command(bin); - if (last_cmd == cmd) { - lp_replace_last_command_arg(bin, arg); - } - else { - lp_bin_command( &setup->bins, i, j, cmd, arg ); - } - } - } -} - - /** Rasterize all tile's bins */ static void rasterize_bins( struct setup_context *setup, boolean write_depth ) { lp_rasterize_bins(setup->rast, - &setup->bins, setup->tiles_x, setup->tiles_y, + &setup->bins, setup->fb, write_depth); @@ -196,20 +127,24 @@ begin_binning( struct setup_context *setup ) if (setup->fb->cbufs[0]) { if (setup->clear.flags & PIPE_CLEAR_COLOR) - bin_everywhere( setup, - lp_rast_clear_color, - setup->clear.color ); + lp_bin_everywhere( &setup->bins, + lp_rast_clear_color, + setup->clear.color ); else - bin_everywhere( setup, lp_rast_load_color, lp_rast_arg_null() ); + lp_bin_everywhere( &setup->bins, + lp_rast_load_color, + lp_rast_arg_null() ); } if (setup->fb->zsbuf) { if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) - bin_everywhere( setup, - lp_rast_clear_zstencil, - setup->clear.zstencil ); + lp_bin_everywhere( &setup->bins, + lp_rast_clear_zstencil, + setup->clear.zstencil ); else - bin_everywhere( setup, lp_rast_load_zstencil, lp_rast_arg_null() ); + lp_bin_everywhere( &setup->bins, + lp_rast_load_zstencil, + lp_rast_arg_null() ); } LP_DBG(DEBUG_SETUP, "%s done\n", __FUNCTION__); @@ -280,13 +215,18 @@ void lp_setup_bind_framebuffer( struct setup_context *setup, const struct pipe_framebuffer_state *fb ) { + unsigned tiles_x, tiles_y; + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); set_state( setup, SETUP_FLUSHED ); setup->fb = fb; - setup->tiles_x = align(setup->fb->width, TILE_SIZE) / TILE_SIZE; - setup->tiles_y = align(setup->fb->height, TILE_SIZE) / TILE_SIZE; + + tiles_x = align(setup->fb->width, TILE_SIZE) / TILE_SIZE; + tiles_y = align(setup->fb->height, TILE_SIZE) / TILE_SIZE; + + lp_bin_set_num_bins(&setup->bins, tiles_x, tiles_y); } @@ -321,14 +261,14 @@ lp_setup_clear( struct setup_context *setup, * don't see that as being a common usage. */ if (flags & PIPE_CLEAR_COLOR) - bin_everywhere( setup, - lp_rast_clear_color, - setup->clear.color ); + lp_bin_everywhere( &setup->bins, + lp_rast_clear_color, + setup->clear.color ); if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) - bin_everywhere( setup, - lp_rast_clear_zstencil, - setup->clear.zstencil ); + lp_bin_everywhere( &setup->bins, + lp_rast_clear_zstencil, + setup->clear.zstencil ); } else { /* Put ourselves into the 'pre-clear' state, specifically to try @@ -545,9 +485,9 @@ lp_setup_update_shader_state( struct setup_context *setup ) setup->fs.stored = stored; /* put the state-set command into all bins */ - bin_state_command( setup, - lp_rast_set_state, - lp_rast_arg_state(setup->fs.stored) ); + lp_bin_state_command( &setup->bins, + lp_rast_set_state, + lp_rast_arg_state(setup->fs.stored) ); } } } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 8478bb9014..9b47b595c6 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -57,10 +57,6 @@ struct setup_context { struct lp_bins bins; - /* size of framebuffer, in tiles */ - unsigned tiles_x; - unsigned tiles_y; - boolean ccw_is_frontface; unsigned cullmode; -- cgit v1.2.3 From 270f15486072b0a2fbea2a21b7a4a9d4c76d4bfb Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 7 Dec 2009 18:04:31 -0700 Subject: llvmpipe: introduce mutex and bin iteration functions --- src/gallium/drivers/llvmpipe/lp_bin.c | 68 +++++++++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_bin.h | 11 ++++++ 2 files changed, 79 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_bin.c b/src/gallium/drivers/llvmpipe/lp_bin.c index 160a8d865b..3e294e5799 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin.c +++ b/src/gallium/drivers/llvmpipe/lp_bin.c @@ -41,9 +41,14 @@ lp_init_bins(struct lp_bins *bins) bins->data.head = bins->data.tail = CALLOC_STRUCT(data_block); + + pipe_mutex_init(bins->mutex); } +/** + * Set bins to empty state. + */ void lp_reset_bins(struct lp_bins *bins ) { @@ -87,6 +92,9 @@ lp_reset_bins(struct lp_bins *bins ) } +/** + * Free all data associated with the given bin, but don't free(bins). + */ void lp_free_bin_data(struct lp_bins *bins) { @@ -104,6 +112,8 @@ lp_free_bin_data(struct lp_bins *bins) FREE(bins->data.head); bins->data.head = NULL; + + pipe_mutex_destroy(bins->mutex); } @@ -191,3 +201,61 @@ lp_bin_state_command( struct lp_bins *bins, } } } + + +/** advance curr_x,y to the next bin */ +static boolean +next_bin(struct lp_bins *bins) +{ + bins->curr_x++; + if (bins->curr_x >= bins->tiles_x) { + bins->curr_x = 0; + bins->curr_y++; + } + if (bins->curr_y >= bins->tiles_y) { + /* no more bins */ + return FALSE; + } + return TRUE; +} + + +void +lp_bin_iter_begin( struct lp_bins *bins ) +{ + bins->curr_x = bins->curr_y = -1; +} + + +/** + * Return point to next bin to be rendered. + * The lp_bins::curr_x and ::curr_y fields will be advanced. + * Multiple rendering threads will call this function to get a chunk + * of work (a bin) to work on. + */ +struct cmd_bin * +lp_bin_iter_next( struct lp_bins *bins, int *bin_x, int *bin_y ) +{ + struct cmd_bin *bin = NULL; + + pipe_mutex_lock(bins->mutex); + + if (bins->curr_x < 0) { + /* first bin */ + bins->curr_x = 0; + bins->curr_y = 0; + } + else if (!next_bin(bins)) { + /* no more bins left */ + goto end; + } + + bin = lp_get_bin(bins, bins->curr_x, bins->curr_y); + *bin_x = bins->curr_x; + *bin_y = bins->curr_y; + +end: + /*printf("return bin %p at %d, %d\n", (void *) bin, *bin_x, *bin_y);*/ + pipe_mutex_unlock(bins->mutex); + return bin; +} diff --git a/src/gallium/drivers/llvmpipe/lp_bin.h b/src/gallium/drivers/llvmpipe/lp_bin.h index fcbb975ad6..24e599ea66 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin.h +++ b/src/gallium/drivers/llvmpipe/lp_bin.h @@ -35,6 +35,7 @@ #ifndef LP_BIN_H #define LP_BIN_H +#include "pipe/p_thread.h" #include "lp_tile_soa.h" #include "lp_rast.h" @@ -110,6 +111,9 @@ struct lp_bins { * This basically the framebuffer size divided by tile size */ unsigned tiles_x, tiles_y; + + int curr_x, curr_y; /**< for iterating over bins */ + pipe_mutex mutex; }; @@ -239,4 +243,11 @@ lp_bin_state_command( struct lp_bins *bins, const union lp_rast_cmd_arg arg ); +void +lp_bin_iter_begin( struct lp_bins *bins ); + +struct cmd_bin * +lp_bin_iter_next( struct lp_bins *bins, int *bin_x, int *bin_y ); + + #endif /* LP_BIN_H */ -- cgit v1.2.3 From cdaea049c95031338040b31ff31944c8a001a1dd Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 7 Dec 2009 15:31:50 -0700 Subject: llvmpipe: use bin iteration functions when rasterizing bins --- src/gallium/drivers/llvmpipe/lp_rast.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 642f1b9079..a6192e589d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -505,8 +505,6 @@ lp_rasterize_bins( struct lp_rasterizer *rast, const struct pipe_framebuffer_state *fb, bool write_depth ) { - unsigned i, j; - LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); lp_rast_begin( rast, @@ -518,12 +516,28 @@ lp_rasterize_bins( struct lp_rasterizer *rast, fb->height ); /* loop over tile bins, rasterize each */ - for (i = 0; i < bins->tiles_x; i++) { - for (j = 0; j < bins->tiles_y; j++) { - struct cmd_bin *bin = lp_get_bin(bins, i, j); - rasterize_bin( rast, bin, i * TILE_SIZE, j * TILE_SIZE ); +#if 0 + { + unsigned i, j; + for (i = 0; i < bins->tiles_x; i++) { + for (j = 0; j < bins->tiles_y; j++) { + struct cmd_bin *bin = lp_get_bin(bins, i, j); + rasterize_bin( rast, bin, i * TILE_SIZE, j * TILE_SIZE ); + } } } +#else + { + struct cmd_bin *bin; + int x, y; + + lp_bin_iter_begin( bins ); + + while ((bin = lp_bin_iter_next(bins, &x, &y))) { + rasterize_bin( rast, bin, x * TILE_SIZE, y * TILE_SIZE); + } + } +#endif lp_rast_end( rast ); -- cgit v1.2.3 From 3a06c113c76355fc9622adfe7565c18d9787e9a8 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 7 Dec 2009 17:02:17 -0700 Subject: llvmpipe: repartition lp_rasterizer state for threading Some of the state is per-thread. Put that state in new lp_rasterizer_task struct. --- src/gallium/drivers/llvmpipe/lp_bin.h | 4 +- src/gallium/drivers/llvmpipe/lp_rast.c | 89 +++++++++++++++++++---------- src/gallium/drivers/llvmpipe/lp_rast.h | 7 +++ src/gallium/drivers/llvmpipe/lp_rast_priv.h | 43 +++++++++----- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 62 ++++++++++---------- 5 files changed, 133 insertions(+), 72 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_bin.h b/src/gallium/drivers/llvmpipe/lp_bin.h index 24e599ea66..b07ff64e62 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin.h +++ b/src/gallium/drivers/llvmpipe/lp_bin.h @@ -56,7 +56,9 @@ /* switch to a non-pointer value for this: */ -typedef void (*lp_rast_cmd)( struct lp_rasterizer *, const union lp_rast_cmd_arg ); +typedef void (*lp_rast_cmd)( struct lp_rasterizer *, + unsigned thread_index, + const union lp_rast_cmd_arg ); struct cmd_block { lp_rast_cmd cmd[CMD_BLOCK_MAX]; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index a6192e589d..37cc28e938 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -39,14 +39,18 @@ struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen ) { struct lp_rasterizer *rast; + unsigned i; rast = CALLOC_STRUCT(lp_rasterizer); if(!rast) return NULL; rast->screen = screen; - rast->tile.color = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); - rast->tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); + + for (i = 0; i < Elements(rast->tasks); i++) { + rast->tasks[i].tile.color = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); + rast->tasks[i].tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); + } return rast; } @@ -153,12 +157,13 @@ lp_rast_end( struct lp_rasterizer *rast ) */ static void lp_rast_start_tile( struct lp_rasterizer *rast, + unsigned thread_index, unsigned x, unsigned y ) { LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y); - rast->x = x; - rast->y = y; + rast->tasks[thread_index].x = x; + rast->tasks[thread_index].y = y; } @@ -167,9 +172,11 @@ lp_rast_start_tile( struct lp_rasterizer *rast, * This is a bin command called during bin processing. */ void lp_rast_clear_color( struct lp_rasterizer *rast, + unsigned thread_index, const union lp_rast_cmd_arg arg ) { const uint8_t *clear_color = arg.clear_color; + uint8_t *color_tile = rast->tasks[thread_index].tile.color; LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__, clear_color[0], @@ -180,14 +187,14 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, if (clear_color[0] == clear_color[1] && clear_color[1] == clear_color[2] && clear_color[2] == clear_color[3]) { - memset(rast->tile.color, clear_color[0], TILE_SIZE * TILE_SIZE * 4); + memset(color_tile, clear_color[0], TILE_SIZE * TILE_SIZE * 4); } else { unsigned x, y, chan; for (y = 0; y < TILE_SIZE; y++) for (x = 0; x < TILE_SIZE; x++) for (chan = 0; chan < 4; ++chan) - TILE_PIXEL(rast->tile.color, x, y, chan) = clear_color[chan]; + TILE_PIXEL(color_tile, x, y, chan) = clear_color[chan]; } } @@ -197,15 +204,17 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, * This is a bin command called during bin processing. */ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, + unsigned thread_index, const union lp_rast_cmd_arg arg) { unsigned i, j; + uint32_t *depth_tile = rast->tasks[thread_index].tile.depth; LP_DBG(DEBUG_RAST, "%s 0x%x\n", __FUNCTION__, arg.clear_zstencil); for (i = 0; i < TILE_SIZE; i++) for (j = 0; j < TILE_SIZE; j++) - rast->tile.depth[i*TILE_SIZE + j] = arg.clear_zstencil; + depth_tile[i*TILE_SIZE + j] = arg.clear_zstencil; } @@ -214,6 +223,7 @@ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, * This is a bin command called during bin processing. */ void lp_rast_load_color( struct lp_rasterizer *rast, + unsigned thread_index, const union lp_rast_cmd_arg arg) { LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); @@ -227,6 +237,7 @@ void lp_rast_load_color( struct lp_rasterizer *rast, * This is a bin command called during bin processing. */ void lp_rast_load_zstencil( struct lp_rasterizer *rast, + unsigned thread_index, const union lp_rast_cmd_arg arg ) { LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); @@ -236,6 +247,7 @@ void lp_rast_load_zstencil( struct lp_rasterizer *rast, void lp_rast_set_state( struct lp_rasterizer *rast, + unsigned thread_index, const union lp_rast_cmd_arg arg ) { const struct lp_rast_state *state = arg.set_state; @@ -243,7 +255,7 @@ void lp_rast_set_state( struct lp_rasterizer *rast, LP_DBG(DEBUG_RAST, "%s %p\n", __FUNCTION__, (void *) state); /* just set the current state pointer for this rasterizer */ - rast->current_state = state; + rast->tasks[thread_index].current_state = state; } @@ -257,9 +269,12 @@ void lp_rast_set_state( struct lp_rasterizer *rast, * This is a bin command called during bin processing. */ void lp_rast_shade_tile( struct lp_rasterizer *rast, + unsigned thread_index, const union lp_rast_cmd_arg arg ) { const struct lp_rast_shader_inputs *inputs = arg.shade_tile; + const unsigned tile_x = rast->tasks[thread_index].x; + const unsigned tile_y = rast->tasks[thread_index].y; const unsigned mask = ~0; unsigned x, y; @@ -269,7 +284,12 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, */ for (y = 0; y < TILE_SIZE; y += 4) for (x = 0; x < TILE_SIZE; x += 4) - lp_rast_shade_quads( rast, inputs, rast->x + x, rast->y + y, mask); + lp_rast_shade_quads( rast, + thread_index, + inputs, + tile_x + x, + tile_y + y, + mask); } @@ -278,13 +298,14 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, * This is a bin command called during bin processing. */ void lp_rast_shade_quads( struct lp_rasterizer *rast, + unsigned thread_index, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, unsigned mask) { #if 1 - const struct lp_rast_state *state = rast->current_state; - struct lp_rast_tile *tile = &rast->tile; + const struct lp_rast_state *state = rast->tasks[thread_index].current_state; + struct lp_rast_tile *tile = &rast->tasks[thread_index].tile; void *color; void *depth; uint32_t ALIGN16_ATTRIB masks[2][2][2][2]; @@ -388,10 +409,11 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, /** * Write the rasterizer's color tile to the framebuffer. */ -static void lp_rast_store_color( struct lp_rasterizer *rast ) +static void lp_rast_store_color( struct lp_rasterizer *rast, + unsigned thread_index) { - const unsigned x = rast->x; - const unsigned y = rast->y; + const unsigned x = rast->tasks[thread_index].x; + const unsigned y = rast->tasks[thread_index].y; unsigned w = TILE_SIZE; unsigned h = TILE_SIZE; @@ -404,7 +426,7 @@ static void lp_rast_store_color( struct lp_rasterizer *rast ) LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); lp_tile_write_4ub(rast->cbuf_transfer->format, - rast->tile.color, + rast->tasks[thread_index].tile.color, rast->cbuf_map, rast->cbuf_transfer->stride, x, y, @@ -430,10 +452,11 @@ lp_tile_write_z32(const uint32_t *src, uint8_t *dst, unsigned dst_stride, /** * Write the rasterizer's z/stencil tile to the framebuffer. */ -static void lp_rast_store_zstencil( struct lp_rasterizer *rast ) +static void lp_rast_store_zstencil( struct lp_rasterizer *rast, + unsigned thread_index ) { - const unsigned x = rast->x; - const unsigned y = rast->y; + const unsigned x = rast->tasks[thread_index].x; + const unsigned y = rast->tasks[thread_index].y; unsigned w = TILE_SIZE; unsigned h = TILE_SIZE; @@ -446,7 +469,7 @@ static void lp_rast_store_zstencil( struct lp_rasterizer *rast ) LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); assert(rast->zsbuf_transfer->format == PIPE_FORMAT_Z32_UNORM); - lp_tile_write_z32(rast->tile.depth, + lp_tile_write_z32(rast->tasks[thread_index].tile.depth, rast->zsbuf_map, rast->zsbuf_transfer->stride, x, y, w, h); @@ -457,15 +480,16 @@ static void lp_rast_store_zstencil( struct lp_rasterizer *rast ) * Write the rasterizer's tiles to the framebuffer. */ static void -lp_rast_end_tile( struct lp_rasterizer *rast ) +lp_rast_end_tile( struct lp_rasterizer *rast, + unsigned thread_index ) { LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); if (rast->state.write_color) - lp_rast_store_color(rast); + lp_rast_store_color(rast, thread_index); if (rast->state.write_zstencil) - lp_rast_store_zstencil(rast); + lp_rast_store_zstencil(rast, thread_index); } @@ -476,6 +500,7 @@ lp_rast_end_tile( struct lp_rasterizer *rast ) */ static void rasterize_bin( struct lp_rasterizer *rast, + unsigned thread_index, const struct cmd_bin *bin, int x, int y) { @@ -483,16 +508,16 @@ rasterize_bin( struct lp_rasterizer *rast, struct cmd_block *block; unsigned k; - lp_rast_start_tile( rast, x, y ); + lp_rast_start_tile( rast, thread_index, x, y ); /* simply execute each of the commands in the block list */ for (block = commands->head; block; block = block->next) { for (k = 0; k < block->count; k++) { - block->cmd[k]( rast, block->arg[k] ); + block->cmd[k]( rast, 0, block->arg[k] ); } } - lp_rast_end_tile( rast ); + lp_rast_end_tile( rast, thread_index ); } @@ -522,7 +547,7 @@ lp_rasterize_bins( struct lp_rasterizer *rast, for (i = 0; i < bins->tiles_x; i++) { for (j = 0; j < bins->tiles_y; j++) { struct cmd_bin *bin = lp_get_bin(bins, i, j); - rasterize_bin( rast, bin, i * TILE_SIZE, j * TILE_SIZE ); + rasterize_bin( rast, 0, bin, i * TILE_SIZE, j * TILE_SIZE ); } } } @@ -534,7 +559,7 @@ lp_rasterize_bins( struct lp_rasterizer *rast, lp_bin_iter_begin( bins ); while ((bin = lp_bin_iter_next(bins, &x, &y))) { - rasterize_bin( rast, bin, x * TILE_SIZE, y * TILE_SIZE); + rasterize_bin( rast, 0, bin, x * TILE_SIZE, y * TILE_SIZE); } } #endif @@ -550,10 +575,16 @@ lp_rasterize_bins( struct lp_rasterizer *rast, */ void lp_rast_destroy( struct lp_rasterizer *rast ) { + unsigned i; + pipe_surface_reference(&rast->state.cbuf, NULL); pipe_surface_reference(&rast->state.zsbuf, NULL); - align_free(rast->tile.depth); - align_free(rast->tile.color); + + for (i = 0; i < Elements(rast->tasks); i++) { + align_free(rast->tasks[i].tile.depth); + align_free(rast->tasks[i].tile.color); + } + FREE(rast); } diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index e77c77b776..25e7f8e008 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -193,24 +193,31 @@ lp_rast_arg_null( void ) */ void lp_rast_clear_color( struct lp_rasterizer *, + unsigned thread_index, const union lp_rast_cmd_arg ); void lp_rast_clear_zstencil( struct lp_rasterizer *, + unsigned thread_index, const union lp_rast_cmd_arg ); void lp_rast_load_color( struct lp_rasterizer *, + unsigned thread_index, const union lp_rast_cmd_arg ); void lp_rast_load_zstencil( struct lp_rasterizer *, + unsigned thread_index, const union lp_rast_cmd_arg ); void lp_rast_set_state( struct lp_rasterizer *, + unsigned thread_index, const union lp_rast_cmd_arg ); void lp_rast_triangle( struct lp_rasterizer *, + unsigned thread_index, const union lp_rast_cmd_arg ); void lp_rast_shade_tile( struct lp_rasterizer *, + unsigned thread_index, const union lp_rast_cmd_arg ); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 98111edff7..9e7cbd7912 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -30,6 +30,10 @@ #include "lp_rast.h" + +#define MAX_THREADS 4 /* XXX probably temporary here */ + + struct pipe_transfer; struct pipe_screen; @@ -47,14 +51,34 @@ struct lp_rast_tile /** - * This is the state required while rasterizing a tile. - * The tile size is TILE_SIZE x TILE_SIZE pixels. + * Per-thread rasterization state */ -struct lp_rasterizer +struct lp_rasterizer_task { struct lp_rast_tile tile; /** Tile color/z/stencil memory */ unsigned x, y; /**< Pos of this tile in framebuffer, in pixels */ + + /* Pixel blocks produced during rasterization + */ + unsigned nr_blocks; + struct { + unsigned x; + unsigned y; + unsigned mask; + } blocks[256]; + + const struct lp_rast_state *current_state; +}; + + +/** + * This is the state required while rasterizing tiles. + * Note that this contains per-thread information too. + * The tile size is TILE_SIZE x TILE_SIZE pixels. + */ +struct lp_rasterizer +{ unsigned width, height; /**< Size of framebuffer, in pixels */ boolean clipped_tile; @@ -78,20 +102,13 @@ struct lp_rasterizer char clear_stencil; } state; - /* Pixel blocks produced during rasterization - */ - unsigned nr_blocks; - struct { - unsigned x; - unsigned y; - unsigned mask; - } blocks[256]; - - const struct lp_rast_state *current_state; + /** A task object for each rasterization thread */ + struct lp_rasterizer_task tasks[MAX_THREADS]; }; void lp_rast_shade_quads( struct lp_rasterizer *rast, + unsigned thread_index, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, unsigned masks); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 81a9c1c142..6c96010c52 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -40,15 +40,15 @@ * All pixels are known to be inside the triangle's bounds. */ static void -block_full_4( struct lp_rasterizer *rast, int x, int y ) +block_full_4( struct lp_rasterizer_task *rast_task, int x, int y ) { - const unsigned i = rast->nr_blocks; + const unsigned i = rast_task->nr_blocks; assert(x % 4 == 0); assert(y % 4 == 0); - rast->blocks[i].x = x; - rast->blocks[i].y = y; - rast->blocks[i].mask = ~0; - rast->nr_blocks++; + rast_task->blocks[i].x = x; + rast_task->blocks[i].y = y; + rast_task->blocks[i].mask = ~0; + rast_task->nr_blocks++; } @@ -57,14 +57,14 @@ block_full_4( struct lp_rasterizer *rast, int x, int y ) * All pixels are known to be inside the triangle's bounds. */ static void -block_full_16( struct lp_rasterizer *rast, int x, int y ) +block_full_16( struct lp_rasterizer_task *rast_task, int x, int y ) { unsigned ix, iy; assert(x % 16 == 0); assert(y % 16 == 0); for (iy = 0; iy < 16; iy += 4) for (ix = 0; ix < 16; ix += 4) - block_full_4(rast, x + ix, y + iy); + block_full_4(rast_task, x + ix, y + iy); } @@ -74,7 +74,7 @@ block_full_16( struct lp_rasterizer *rast, int x, int y ) * Generate a mask of in/out flags and add the block to the blocks list. */ static void -do_block_4( struct lp_rasterizer *rast, +do_block_4( struct lp_rasterizer_task *rast_task, const struct lp_rast_triangle *tri, int x, int y, int c1, @@ -97,11 +97,11 @@ do_block_4( struct lp_rasterizer *rast, /* As we do trivial reject already, masks should rarely be all zero: */ if (mask) { - const unsigned i = rast->nr_blocks; - rast->blocks[i].x = x; - rast->blocks[i].y = y; - rast->blocks[i].mask = mask; - rast->nr_blocks++; + const unsigned i = rast_task->nr_blocks; + rast_task->blocks[i].x = x; + rast_task->blocks[i].y = y; + rast_task->blocks[i].mask = mask; + rast_task->nr_blocks++; } } @@ -111,7 +111,7 @@ do_block_4( struct lp_rasterizer *rast, * of the triangle's bounds. */ static void -do_block_16( struct lp_rasterizer *rast, +do_block_16( struct lp_rasterizer_task *rast_task, const struct lp_rast_triangle *tri, int x, int y, int c1, @@ -146,11 +146,11 @@ do_block_16( struct lp_rasterizer *rast, cx2 + ei2 > 0 && cx3 + ei3 > 0) { /* the block is completely inside the triangle */ - block_full_4(rast, x+ix, y+iy); + block_full_4(rast_task, x+ix, y+iy); } else { /* the block is partially in/out of the triangle */ - do_block_4(rast, tri, x+ix, y+iy, cx1, cx2, cx3); + do_block_4(rast_task, tri, x+ix, y+iy, cx1, cx2, cx3); } } } @@ -163,12 +163,14 @@ do_block_16( struct lp_rasterizer *rast, */ void lp_rast_triangle( struct lp_rasterizer *rast, + unsigned thread_index, const union lp_rast_cmd_arg arg ) { + struct lp_rasterizer_task *rast_task = &rast->tasks[thread_index]; const struct lp_rast_triangle *tri = arg.triangle; - int x = rast->x; - int y = rast->y; + int x = rast_task->x; + int y = rast_task->y; int ix, iy; unsigned i = 0; @@ -184,11 +186,11 @@ lp_rast_triangle( struct lp_rasterizer *rast, int eo2 = tri->eo2 * 16; int eo3 = tri->eo3 * 16; - assert(Elements(rast->blocks) == (TILE_SIZE * TILE_SIZE) / (4*4)); + assert(Elements(rast_task->blocks) == (TILE_SIZE * TILE_SIZE) / (4*4)); LP_DBG(DEBUG_RAST, "lp_rast_triangle\n"); - rast->nr_blocks = 0; + rast_task->nr_blocks = 0; /* Walk over the tile to build a list of 4x4 pixel blocks which will * be filled/shaded. We do this at two granularities: 16x16 blocks @@ -209,21 +211,23 @@ lp_rast_triangle( struct lp_rasterizer *rast, cx2 + ei2 > 0 && cx3 + ei3 > 0) { /* the block is completely inside the triangle */ - block_full_16(rast, x+ix, y+iy); + block_full_16(rast_task, x+ix, y+iy); } else { /* the block is partially in/out of the triangle */ - do_block_16(rast, tri, x+ix, y+iy, cx1, cx2, cx3); + do_block_16(rast_task, tri, x+ix, y+iy, cx1, cx2, cx3); } } } - assert(rast->nr_blocks <= Elements(rast->blocks)); + assert(rast_task->nr_blocks <= Elements(rast_task->blocks)); /* Shade the 4x4 pixel blocks */ - for (i = 0; i < rast->nr_blocks; i++) - lp_rast_shade_quads(rast, &tri->inputs, - rast->blocks[i].x, - rast->blocks[i].y, - rast->blocks[i].mask); + for (i = 0; i < rast_task->nr_blocks; i++) + lp_rast_shade_quads(rast, + thread_index, + &tri->inputs, + rast_task->blocks[i].x, + rast_task->blocks[i].y, + rast_task->blocks[i].mask); } -- cgit v1.2.3 From 87c9ceaea2138e051c48cd8c0fbf5f6658100779 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 7 Dec 2009 17:58:46 -0700 Subject: gallium: added pipe_semaphore and related code --- src/gallium/include/pipe/p_thread.h | 50 +++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/src/gallium/include/pipe/p_thread.h b/src/gallium/include/pipe/p_thread.h index 25e4148232..45c35a87d0 100644 --- a/src/gallium/include/pipe/p_thread.h +++ b/src/gallium/include/pipe/p_thread.h @@ -207,6 +207,56 @@ typedef unsigned pipe_condvar; #endif /* PIPE_OS_? */ +/* + * Semaphores + */ + +typedef struct +{ + pipe_mutex mutex; + pipe_condvar cond; + int counter; +} pipe_semaphore; + + +static INLINE void +pipe_semaphore_init(pipe_semaphore *sema, int init_val) +{ + pipe_mutex_init(sema->mutex); + pipe_condvar_init(sema->cond); + sema->counter = init_val; +} + +static INLINE void +pipe_semaphore_destroy(pipe_semaphore *sema) +{ + pipe_mutex_destroy(sema->mutex); + pipe_condvar_destroy(sema->cond); +} + +/** Signal/increment semaphore counter */ +static INLINE void +pipe_semaphore_signal(pipe_semaphore *sema) +{ + pipe_mutex_lock(sema->mutex); + sema->counter++; + pipe_condvar_signal(sema->cond); + pipe_mutex_unlock(sema->mutex); +} + +/** Wait for semaphore counter to be greater than zero */ +static INLINE void +pipe_semaphore_wait(pipe_semaphore *sema) +{ + pipe_mutex_lock(sema->mutex); + while (sema->counter <= 0) { + pipe_condvar_wait(sema->cond, sema->mutex); + } + sema->counter--; + pipe_mutex_unlock(sema->mutex); +} + + /* * Thread-specific data. -- cgit v1.2.3 From aab1ceceecbd6449eebce7f5f5b356b1a51552e7 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 7 Dec 2009 18:01:12 -0700 Subject: llvmpipe: implement threaded rasterization The LP_NUM_THREADS env var controls how many threads are created. The default (and max) is 4, for now. If LP_NUM_THREADS = 0, threading is not used. --- src/gallium/drivers/llvmpipe/lp_rast.c | 195 ++++++++++++++++++++++------ src/gallium/drivers/llvmpipe/lp_rast_priv.h | 18 +++ 2 files changed, 170 insertions(+), 43 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 37cc28e938..99f7108b42 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -26,6 +26,7 @@ **************************************************************************/ #include "util/u_memory.h" +#include "util/u_math.h" #include "lp_debug.h" #include "lp_state.h" @@ -36,25 +37,6 @@ #include "lp_bin.h" -struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen ) -{ - struct lp_rasterizer *rast; - unsigned i; - - rast = CALLOC_STRUCT(lp_rasterizer); - if(!rast) - return NULL; - - rast->screen = screen; - - for (i = 0; i < Elements(rast->tasks); i++) { - rast->tasks[i].tile.color = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); - rast->tasks[i].tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); - } - - return rast; -} - /** * Begin the rasterization phase. @@ -414,16 +396,25 @@ static void lp_rast_store_color( struct lp_rasterizer *rast, { const unsigned x = rast->tasks[thread_index].x; const unsigned y = rast->tasks[thread_index].y; - unsigned w = TILE_SIZE; - unsigned h = TILE_SIZE; + int w = TILE_SIZE; + int h = TILE_SIZE; if (x + w > rast->width) w -= x + w - rast->width; - if (y + h > rast->height) - h -= y + h - rast->height; + if (y + h > rast->height) { + int h2; + h2 = h - (y + h - rast->height); + assert(h2 <= TILE_SIZE); + h = h2; + } + assert(w >= 0); + assert(h >= 0); + assert(w <= TILE_SIZE); + assert(h <= TILE_SIZE); - LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); + LP_DBG(DEBUG_RAST, "%s [%u] %d,%d %dx%d\n", __FUNCTION__, + thread_index, x, y, w, h); lp_tile_write_4ub(rast->cbuf_transfer->format, rast->tasks[thread_index].tile.color, @@ -513,7 +504,7 @@ rasterize_bin( struct lp_rasterizer *rast, /* simply execute each of the commands in the block list */ for (block = commands->head; block; block = block->next) { for (k = 0; k < block->count; k++) { - block->cmd[k]( rast, 0, block->arg[k] ); + block->cmd[k]( rast, thread_index, block->arg[k] ); } } @@ -524,6 +515,41 @@ rasterize_bin( struct lp_rasterizer *rast, /** * Rasterize/execute all bins. */ +static void +rasterize_bins( struct lp_rasterizer *rast, + unsigned thread_index, + struct lp_bins *bins, + const struct pipe_framebuffer_state *fb, + bool write_depth ) +{ + /* loop over tile bins, rasterize each */ +#if 0 + { + unsigned i, j; + for (i = 0; i < bins->tiles_x; i++) { + for (j = 0; j < bins->tiles_y; j++) { + struct cmd_bin *bin = lp_get_bin(bins, i, j); + rasterize_bin( rast, thread_index, + bin, i * TILE_SIZE, j * TILE_SIZE ); + } + } + } +#else + { + struct cmd_bin *bin; + int x, y; + + while ((bin = lp_bin_iter_next(bins, &x, &y))) { + rasterize_bin( rast, thread_index, bin, x * TILE_SIZE, y * TILE_SIZE); + } + } +#endif +} + + +/** + * Called by rasterizer when it has something for us to render. + */ void lp_rasterize_bins( struct lp_rasterizer *rast, struct lp_bins *bins, @@ -539,30 +565,32 @@ lp_rasterize_bins( struct lp_rasterizer *rast, fb->zsbuf != NULL && write_depth, fb->width, fb->height ); - - /* loop over tile bins, rasterize each */ -#if 0 - { - unsigned i, j; - for (i = 0; i < bins->tiles_x; i++) { - for (j = 0; j < bins->tiles_y; j++) { - struct cmd_bin *bin = lp_get_bin(bins, i, j); - rasterize_bin( rast, 0, bin, i * TILE_SIZE, j * TILE_SIZE ); - } - } + + if (rast->num_threads == 0) { + /* no threading */ + lp_bin_iter_begin( bins ); + rasterize_bins( rast, 0, bins, fb, write_depth ); } -#else - { - struct cmd_bin *bin; - int x, y; + else { + /* threaded rendering! */ + unsigned i; + + rast->bins = bins; + rast->fb = fb; + rast->write_depth = write_depth; lp_bin_iter_begin( bins ); - while ((bin = lp_bin_iter_next(bins, &x, &y))) { - rasterize_bin( rast, 0, bin, x * TILE_SIZE, y * TILE_SIZE); + /* signal the threads that there's work to do */ + for (i = 0; i < rast->num_threads; i++) { + pipe_semaphore_signal(&rast->tasks[i].work_ready); + } + + /* wait for work to complete */ + for (i = 0; i < rast->num_threads; i++) { + pipe_semaphore_wait(&rast->tasks[i].work_done); } } -#endif lp_rast_end( rast ); @@ -570,6 +598,87 @@ lp_rasterize_bins( struct lp_rasterizer *rast, } +/** + * This is the thread's main entrypoint. + * It's a simple loop: + * 1. wait for work + * 2. do work + * 3. signal that we're done + */ +static void * +thread_func( void *init_data ) +{ + struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data; + struct lp_rasterizer *rast = task->rast; + int debug = 0; + + while (1) { + /* wait for work */ + if (debug) + debug_printf("thread %d waiting for work\n", task->thread_index); + pipe_semaphore_wait(&task->work_ready); + + /* do work */ + if (debug) + debug_printf("thread %d doing work\n", task->thread_index); + rasterize_bins(rast, task->thread_index, + rast->bins, rast->fb, rast->write_depth); + + /* signal done with work */ + if (debug) + debug_printf("thread %d done working\n", task->thread_index); + pipe_semaphore_signal(&task->work_done); + } + + return NULL; +} + + +/** + * Initialize semaphores and spawn the threads. + */ +static void +create_rast_threads(struct lp_rasterizer *rast) +{ + unsigned i; + + rast->num_threads = debug_get_num_option("LP_NUM_THREADS", MAX_THREADS); + rast->num_threads = MIN2(rast->num_threads, MAX_THREADS); + + /* NOTE: if num_threads is zero, we won't use any threads */ + for (i = 0; i < rast->num_threads; i++) { + pipe_semaphore_init(&rast->tasks[i].work_ready, 0); + pipe_semaphore_init(&rast->tasks[i].work_done, 0); + rast->threads[i] = pipe_thread_create(thread_func, + (void *) &rast->tasks[i]); + } +} + + + +struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen ) +{ + struct lp_rasterizer *rast; + unsigned i; + + rast = CALLOC_STRUCT(lp_rasterizer); + if(!rast) + return NULL; + + rast->screen = screen; + + for (i = 0; i < Elements(rast->tasks); i++) { + rast->tasks[i].tile.color = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); + rast->tasks[i].tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); + rast->tasks[i].rast = rast; + rast->tasks[i].thread_index = i; + } + + create_rast_threads(rast); + + return rast; +} + /* Shutdown: */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 9e7cbd7912..62f3c877da 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -28,6 +28,7 @@ #ifndef LP_RAST_PRIV_H #define LP_RAST_PRIV_H +#include "pipe/p_thread.h" #include "lp_rast.h" @@ -36,6 +37,7 @@ struct pipe_transfer; struct pipe_screen; +struct lp_rasterizer; /** @@ -69,6 +71,15 @@ struct lp_rasterizer_task } blocks[256]; const struct lp_rast_state *current_state; + + /** "back" pointer */ + struct lp_rasterizer *rast; + + /** "my" index */ + unsigned thread_index; + + pipe_semaphore work_ready; + pipe_semaphore work_done; }; @@ -104,6 +115,13 @@ struct lp_rasterizer /** A task object for each rasterization thread */ struct lp_rasterizer_task tasks[MAX_THREADS]; + + unsigned num_threads; + pipe_thread threads[MAX_THREADS]; + + struct lp_bins *bins; + const struct pipe_framebuffer_state *fb; + boolean write_depth; }; -- cgit v1.2.3 From 73e13c33fd0a9b8574d00d01d301b9d4f88d4051 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 7 Dec 2009 18:18:37 -0700 Subject: llvmpipe: remove some left-over debug code --- src/gallium/drivers/llvmpipe/lp_rast.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 99f7108b42..01685b79d8 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -402,12 +402,9 @@ static void lp_rast_store_color( struct lp_rasterizer *rast, if (x + w > rast->width) w -= x + w - rast->width; - if (y + h > rast->height) { - int h2; - h2 = h - (y + h - rast->height); - assert(h2 <= TILE_SIZE); - h = h2; - } + if (y + h > rast->height) + h -= y + h - rast->height; + assert(w >= 0); assert(h >= 0); assert(w <= TILE_SIZE); -- cgit v1.2.3 From 7f457acabcbeea6a27b4f375f55e318fff52445f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Tue, 8 Dec 2009 08:02:49 +0000 Subject: llvmpipe: Use number of CPUs as default number of threads. Also bump MAX_THREADS to 8. --- src/gallium/drivers/llvmpipe/lp_rast.c | 4 +++- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 01685b79d8..7cd046cc39 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -27,6 +27,7 @@ #include "util/u_memory.h" #include "util/u_math.h" +#include "util/u_cpu_detect.h" #include "lp_debug.h" #include "lp_state.h" @@ -639,7 +640,8 @@ create_rast_threads(struct lp_rasterizer *rast) { unsigned i; - rast->num_threads = debug_get_num_option("LP_NUM_THREADS", MAX_THREADS); + rast->num_threads = util_cpu_caps.nr_cpus; + rast->num_threads = debug_get_num_option("LP_NUM_THREADS", rast->num_threads); rast->num_threads = MIN2(rast->num_threads, MAX_THREADS); /* NOTE: if num_threads is zero, we won't use any threads */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 62f3c877da..5502419a92 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -32,7 +32,7 @@ #include "lp_rast.h" -#define MAX_THREADS 4 /* XXX probably temporary here */ +#define MAX_THREADS 8 /* XXX probably temporary here */ struct pipe_transfer; -- cgit v1.2.3 From 88e62b33dc5ed4a4ab0c668e627c7e85991c74a1 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 9 Dec 2009 12:22:12 -0700 Subject: llvmpipe: more bin functions for create/destroy/queries --- src/gallium/drivers/llvmpipe/lp_bin.c | 47 +++++++++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_bin.h | 9 +++++++ 2 files changed, 56 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_bin.c b/src/gallium/drivers/llvmpipe/lp_bin.c index 3e294e5799..f2d3c2df4d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin.c +++ b/src/gallium/drivers/llvmpipe/lp_bin.c @@ -29,6 +29,25 @@ #include "lp_bin.h" +struct lp_bins * +lp_bins_create(void) +{ + struct lp_bins *bins = CALLOC_STRUCT(lp_bins); + if (bins) + lp_init_bins(bins); + return bins; +} + + +void +lp_bins_destroy(struct lp_bins *bins) +{ + lp_reset_bins(bins); + lp_free_bin_data(bins); + FREE(bins); +} + + void lp_init_bins(struct lp_bins *bins) { @@ -147,6 +166,34 @@ lp_bin_new_data_block( struct data_block_list *list ) } +/** Return number of bytes used for bin data */ +unsigned +lp_bin_data_size( const struct lp_bins *bins ) +{ + unsigned size = 0; + const struct data_block *block; + for (block = bins->data.head; block; block = block->next) { + size += block->used; + } + return size; +} + + +/** Return number of bytes used for a tile bin */ +unsigned +lp_bin_cmd_size( const struct lp_bins *bins, unsigned x, unsigned y ) +{ + struct cmd_bin *bin = lp_get_bin((struct lp_bins *) bins, x, y); + const struct cmd_block *cmd; + unsigned size = 0; + for (cmd = bin->commands.head; cmd; cmd = cmd->next) { + size += (cmd->count * + (sizeof(lp_rast_cmd) + sizeof(union lp_rast_cmd_arg))); + } + return size; +} + + /** * Return last command in the bin */ diff --git a/src/gallium/drivers/llvmpipe/lp_bin.h b/src/gallium/drivers/llvmpipe/lp_bin.h index b07ff64e62..c49b0264d6 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin.h +++ b/src/gallium/drivers/llvmpipe/lp_bin.h @@ -120,6 +120,11 @@ struct lp_bins { +struct lp_bins *lp_bins_create(void); + +void lp_bins_destroy(struct lp_bins *bins); + + void lp_init_bins(struct lp_bins *bins); void lp_reset_bins(struct lp_bins *bins ); @@ -134,6 +139,10 @@ void lp_bin_new_data_block( struct data_block_list *list ); void lp_bin_new_cmd_block( struct cmd_block_list *list ); +unsigned lp_bin_data_size( const struct lp_bins *bins ); + +unsigned lp_bin_cmd_size( const struct lp_bins *bins, unsigned x, unsigned y ); + /** * Allocate space for a command/data in the bin's data buffer. -- cgit v1.2.3 From 22b07b8be4c2939b00e10f17fa91e68682808594 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 9 Dec 2009 12:28:54 -0700 Subject: llvmpipe: use new lp_setup_get_current_bins() function This stub function will interface to the queue system... --- src/gallium/drivers/llvmpipe/lp_setup.c | 46 +++++++++++++++++-------- src/gallium/drivers/llvmpipe/lp_setup_context.h | 3 +- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 20 ++++++----- 3 files changed, 44 insertions(+), 25 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 4935d5b540..484a609e6e 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -47,6 +47,13 @@ static void set_state( struct setup_context *, unsigned ); +struct lp_bins * +lp_setup_get_current_bins(struct setup_context *setup) +{ + /* XXX eventually get bin from queue */ + return setup->bins; +} + static void first_triangle( struct setup_context *setup, @@ -88,7 +95,7 @@ static void reset_context( struct setup_context *setup ) setup->fs.stored = NULL; setup->dirty = ~0; - lp_reset_bins( &setup->bins ); + lp_reset_bins( setup->bins ); /* Reset some state: */ @@ -108,8 +115,10 @@ static void rasterize_bins( struct setup_context *setup, boolean write_depth ) { + struct lp_bins *bins = lp_setup_get_current_bins(setup); + lp_rasterize_bins(setup->rast, - &setup->bins, + bins, setup->fb, write_depth); @@ -123,26 +132,28 @@ rasterize_bins( struct setup_context *setup, static void begin_binning( struct setup_context *setup ) { + struct lp_bins *bins = lp_setup_get_current_bins(setup); + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); if (setup->fb->cbufs[0]) { if (setup->clear.flags & PIPE_CLEAR_COLOR) - lp_bin_everywhere( &setup->bins, + lp_bin_everywhere( bins, lp_rast_clear_color, setup->clear.color ); else - lp_bin_everywhere( &setup->bins, + lp_bin_everywhere( bins, lp_rast_load_color, lp_rast_arg_null() ); } if (setup->fb->zsbuf) { if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) - lp_bin_everywhere( &setup->bins, + lp_bin_everywhere( bins, lp_rast_clear_zstencil, setup->clear.zstencil ); else - lp_bin_everywhere( &setup->bins, + lp_bin_everywhere( bins, lp_rast_load_zstencil, lp_rast_arg_null() ); } @@ -215,6 +226,7 @@ void lp_setup_bind_framebuffer( struct setup_context *setup, const struct pipe_framebuffer_state *fb ) { + struct lp_bins *bins = lp_setup_get_current_bins(setup); unsigned tiles_x, tiles_y; LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -226,7 +238,7 @@ lp_setup_bind_framebuffer( struct setup_context *setup, tiles_x = align(setup->fb->width, TILE_SIZE) / TILE_SIZE; tiles_y = align(setup->fb->height, TILE_SIZE) / TILE_SIZE; - lp_bin_set_num_bins(&setup->bins, tiles_x, tiles_y); + lp_bin_set_num_bins(bins, tiles_x, tiles_y); } @@ -237,6 +249,7 @@ lp_setup_clear( struct setup_context *setup, unsigned stencil, unsigned flags ) { + struct lp_bins *bins = lp_setup_get_current_bins(setup); unsigned i; LP_DBG(DEBUG_SETUP, "%s state %d\n", __FUNCTION__, setup->state); @@ -261,12 +274,12 @@ lp_setup_clear( struct setup_context *setup, * don't see that as being a common usage. */ if (flags & PIPE_CLEAR_COLOR) - lp_bin_everywhere( &setup->bins, + lp_bin_everywhere( bins, lp_rast_clear_color, setup->clear.color ); if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) - lp_bin_everywhere( &setup->bins, + lp_bin_everywhere( bins, lp_rast_clear_zstencil, setup->clear.zstencil ); } @@ -407,6 +420,8 @@ lp_setup_is_texture_referenced( struct setup_context *setup, static INLINE void lp_setup_update_shader_state( struct setup_context *setup ) { + struct lp_bins *bins = lp_setup_get_current_bins(setup); + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); assert(setup->fs.current.jit_function); @@ -415,7 +430,7 @@ lp_setup_update_shader_state( struct setup_context *setup ) uint8_t *stored; unsigned i, j; - stored = lp_bin_alloc_aligned(&setup->bins, 4 * 16, 16); + stored = lp_bin_alloc_aligned(bins, 4 * 16, 16); /* smear each blend color component across 16 ubyte elements */ for (i = 0; i < 4; ++i) { @@ -447,7 +462,7 @@ lp_setup_update_shader_state( struct setup_context *setup ) current_size) != 0) { void *stored; - stored = lp_bin_alloc(&setup->bins, current_size); + stored = lp_bin_alloc(bins, current_size); if(stored) { memcpy(stored, current_data, @@ -477,7 +492,7 @@ lp_setup_update_shader_state( struct setup_context *setup ) * and append it to the bin's setup data buffer. */ struct lp_rast_state *stored = - (struct lp_rast_state *) lp_bin_alloc(&setup->bins, sizeof *stored); + (struct lp_rast_state *) lp_bin_alloc(bins, sizeof *stored); if(stored) { memcpy(stored, &setup->fs.current, @@ -485,7 +500,7 @@ lp_setup_update_shader_state( struct setup_context *setup ) setup->fs.stored = stored; /* put the state-set command into all bins */ - lp_bin_state_command( &setup->bins, + lp_bin_state_command( bins, lp_rast_set_state, lp_rast_arg_state(setup->fs.stored) ); } @@ -537,9 +552,10 @@ lp_setup_destroy( struct setup_context *setup ) pipe_buffer_reference(&setup->constants.current, NULL); - lp_free_bin_data(&setup->bins); + lp_bins_destroy(setup->bins); lp_rast_destroy( setup->rast ); + FREE( setup ); } @@ -557,7 +573,7 @@ lp_setup_create( struct pipe_screen *screen ) if (!setup->rast) goto fail; - lp_init_bins(&setup->bins); + setup->bins = lp_bins_create(); setup->triangle = first_triangle; setup->line = first_line; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 9b47b595c6..782c05122c 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -55,7 +55,7 @@ struct setup_context { struct lp_rasterizer *rast; - struct lp_bins bins; + struct lp_bins *bins; boolean ccw_is_frontface; unsigned cullmode; @@ -113,5 +113,6 @@ void lp_setup_choose_triangle( struct setup_context *setup ); void lp_setup_choose_line( struct setup_context *setup ); void lp_setup_choose_point( struct setup_context *setup ); +struct lp_bins *lp_setup_get_current_bins(struct setup_context *setup); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index b8f79849e8..80617120b1 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -178,6 +178,7 @@ static void setup_tri_coefficients( struct setup_context *setup, const float (*v3)[4], boolean frontface) { + struct lp_bins *bins = lp_setup_get_current_bins(setup); unsigned slot; /* Allocate space for the a0, dadx and dady arrays @@ -185,9 +186,9 @@ static void setup_tri_coefficients( struct setup_context *setup, { unsigned bytes; bytes = (setup->fs.nr_inputs + 1) * 4 * sizeof(float); - tri->inputs.a0 = lp_bin_alloc_aligned( &setup->bins, bytes, 16 ); - tri->inputs.dadx = lp_bin_alloc_aligned( &setup->bins, bytes, 16 ); - tri->inputs.dady = lp_bin_alloc_aligned( &setup->bins, bytes, 16 ); + tri->inputs.a0 = lp_bin_alloc_aligned( bins, bytes, 16 ); + tri->inputs.dadx = lp_bin_alloc_aligned( bins, bytes, 16 ); + tri->inputs.dady = lp_bin_alloc_aligned( bins, bytes, 16 ); } /* The internal position input is in slot zero: @@ -263,7 +264,8 @@ do_triangle_ccw(struct setup_context *setup, const int y2 = subpixel_snap(v2[0][1]); const int y3 = subpixel_snap(v3[0][1]); - struct lp_rast_triangle *tri = lp_bin_alloc( &setup->bins, sizeof *tri ); + struct lp_bins *bins = lp_setup_get_current_bins(setup); + struct lp_rast_triangle *tri = lp_bin_alloc( bins, sizeof *tri ); float area, oneoverarea; int minx, maxx, miny, maxy; @@ -283,7 +285,7 @@ do_triangle_ccw(struct setup_context *setup, * XXX: subject to overflow?? */ if (area <= 0) { - lp_bin_putback_data( &setup->bins, sizeof *tri ); + lp_bin_putback_data( bins, sizeof *tri ); return; } @@ -295,7 +297,7 @@ do_triangle_ccw(struct setup_context *setup, if (tri->miny == tri->maxy || tri->minx == tri->maxx) { - lp_bin_putback_data( &setup->bins, sizeof *tri ); + lp_bin_putback_data( bins, sizeof *tri ); return; } @@ -405,7 +407,7 @@ do_triangle_ccw(struct setup_context *setup, { /* Triangle is contained in a single tile: */ - lp_bin_command( &setup->bins, minx, miny, lp_rast_triangle, + lp_bin_command( bins, minx, miny, lp_rast_triangle, lp_rast_arg_triangle(tri) ); } else @@ -464,7 +466,7 @@ do_triangle_ccw(struct setup_context *setup, { in = 1; /* triangle covers the whole tile- shade whole tile */ - lp_bin_command( &setup->bins, x, y, + lp_bin_command( bins, x, y, lp_rast_shade_tile, lp_rast_arg_inputs(&tri->inputs) ); } @@ -472,7 +474,7 @@ do_triangle_ccw(struct setup_context *setup, { in = 1; /* shade partial tile */ - lp_bin_command( &setup->bins, x, y, + lp_bin_command( bins, x, y, lp_rast_triangle, lp_rast_arg_triangle(tri) ); } -- cgit v1.2.3 From ea35993e7479793212529b1db081c84aa71ea4cc Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 9 Dec 2009 14:36:52 -0700 Subject: llvmpipe: added new lp_bin_queue.[ch] files The queues will be used for keeping track of full and empty bins so we can overlap setup with the rasterization threads. --- src/gallium/drivers/llvmpipe/Makefile | 1 + src/gallium/drivers/llvmpipe/SConscript | 1 + src/gallium/drivers/llvmpipe/lp_bin_queue.c | 156 ++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_bin_queue.h | 55 ++++++++++ 4 files changed, 213 insertions(+) create mode 100644 src/gallium/drivers/llvmpipe/lp_bin_queue.c create mode 100644 src/gallium/drivers/llvmpipe/lp_bin_queue.h diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 6ff45d0f05..4cc4c88ffd 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -7,6 +7,7 @@ CFLAGS += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS C_SOURCES = \ lp_bin.c \ + lp_bin_queue.c \ lp_bld_alpha.c \ lp_bld_arit.c \ lp_bld_blend_aos.c \ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 4aef338735..19ef686167 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -20,6 +20,7 @@ llvmpipe = env.ConvenienceLibrary( target = 'llvmpipe', source = [ 'lp_bin.c', + 'lp_bin_queue.c', 'lp_bld_alpha.c', 'lp_bld_arit.c', 'lp_bld_blend_aos.c', diff --git a/src/gallium/drivers/llvmpipe/lp_bin_queue.c b/src/gallium/drivers/llvmpipe/lp_bin_queue.c new file mode 100644 index 0000000000..19e1a5827b --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bin_queue.c @@ -0,0 +1,156 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Bin queue. We'll use two queues. One contains "full" bins which + * are produced by the "setup" code. The other contains "empty" bins + * which are produced by the "rast" code when it finishes rendering a bin. + */ + + +#include "pipe/p_thread.h" +#include "lp_bin.h" +#include "lp_bin_queue.h" + + + +#define MAX_BINS 4 + + +/** + * A queue of bins + */ +struct lp_bins_queue +{ + /** XXX might use a linked list here somedone, but the list will + * probably always be pretty short. + */ + struct lp_bins *bins[MAX_BINS]; + unsigned size; + + pipe_condvar size_change; + pipe_mutex mutex; +}; + + + +/** Allocate a new bins queue */ +struct lp_bins_queue * +lp_bins_queue_create(void) +{ + struct lp_bins_queue *queue = CALLOC_STRUCT(lp_bins_queue); + if (queue) { + pipe_condvar_init(queue->size_change); + pipe_mutex_init(queue->mutex); + } + return queue; +} + + +/** Delete a new bins queue */ +void +lp_bins_queue_destroy(struct lp_bins_queue *queue) +{ + pipe_condvar_destroy(queue->size_change); + pipe_mutex_destroy(queue->mutex); +} + + +/** Remove first lp_bins from head of queue */ +struct lp_bins * +lp_bins_dequeue(struct lp_bins_queue *queue) +{ + struct lp_bins *bins; + unsigned i; + + pipe_mutex_lock(queue->mutex); + while (queue->size == 0) { + pipe_condvar_wait(queue->size_change, queue->mutex); + } + + assert(queue->size >= 1); + + /* get head */ + bins = queue->bins[0]; + + /* shift entries */ + for (i = 0; i < queue->size - 1; i++) { + queue->bins[i] = queue->bins[i + 1]; + } + + queue->size--; + + /* signal size change */ + pipe_condvar_signal(queue->size_change); + + pipe_mutex_unlock(queue->mutex); + + return bins; +} + + +/** Add an lp_bins to tail of queue */ +void +lp_bins_enqueue(struct lp_bins_queue *queue, struct lp_bins *bins) +{ + pipe_mutex_lock(queue->mutex); + + assert(queue->size < MAX_BINS); + + /* add to end */ + queue->bins[queue->size++] = bins; + + /* signal size change */ + pipe_condvar_signal(queue->size_change); + + pipe_mutex_unlock(queue->mutex); +} + + +/** Return number of entries in the queue */ +unsigned +lp_bins_queue_size(struct lp_bins_queue *queue) +{ + unsigned sz; + pipe_mutex_lock(queue->mutex); + sz = queue->size; + pipe_mutex_unlock(queue->mutex); + return sz; +} + + +/** Wait until the queue as 'size' entries */ +void +lp_bins_queue_wait_size(struct lp_bins_queue *queue, unsigned size) +{ + pipe_mutex_lock(queue->mutex); + while (queue->size != size) { + pipe_condvar_wait(queue->size_change, queue->mutex); + } + pipe_mutex_unlock(queue->mutex); +} diff --git a/src/gallium/drivers/llvmpipe/lp_bin_queue.h b/src/gallium/drivers/llvmpipe/lp_bin_queue.h new file mode 100644 index 0000000000..8946a54158 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bin_queue.h @@ -0,0 +1,55 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef LP_BIN_QUEUE +#define LP_BIN_QUEUE + +struct lp_bin_queue; +struct lp_bins; + + +struct lp_bins_queue * +lp_bins_queue_create(void); + +void +lp_bins_queue_destroy(struct lp_bins_queue *queue); + +struct lp_bins * +lp_bins_dequeue(struct lp_bins_queue *queue); + +void +lp_bins_enqueue(struct lp_bins_queue *queue, struct lp_bins *bins); + +unsigned +lp_bins_queue_size(struct lp_bins_queue *queue); + +void +lp_bins_queue_wait_size(struct lp_bins_queue *queue, unsigned size); + + +#endif /* LP_BIN_QUEUE */ -- cgit v1.2.3 From d7dbc666367438ee9efe748505907b36bba6b66a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 9 Dec 2009 14:53:33 -0700 Subject: llvmpipe: checkpoint: begin plugging in bin queue code --- src/gallium/drivers/llvmpipe/lp_rast.c | 12 ++++++++- src/gallium/drivers/llvmpipe/lp_rast.h | 4 ++- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 4 +++ src/gallium/drivers/llvmpipe/lp_setup.c | 33 ++++++++++++++++++++++++- src/gallium/drivers/llvmpipe/lp_setup_context.h | 7 +++++- 5 files changed, 56 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 7cd046cc39..0471ad7e2f 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -29,6 +29,7 @@ #include "util/u_math.h" #include "util/u_cpu_detect.h" +#include "lp_bin_queue.h" #include "lp_debug.h" #include "lp_state.h" #include "lp_rast.h" @@ -655,7 +656,13 @@ create_rast_threads(struct lp_rasterizer *rast) -struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen ) +/** + * Create new lp_rasterizer. + * \param empty the queue to put empty bins on after we've finished + * processing them. + */ +struct lp_rasterizer * +lp_rast_create( struct pipe_screen *screen, struct lp_bins_queue *empty ) { struct lp_rasterizer *rast; unsigned i; @@ -666,6 +673,9 @@ struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen ) rast->screen = screen; + rast->empty_bins = empty; + rast->full_bins = lp_bins_queue_create(); + for (i = 0; i < Elements(rast->tasks); i++) { rast->tasks[i].tile.color = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); rast->tasks[i].tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 25e7f8e008..0000fbc5c7 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -48,6 +48,7 @@ */ struct lp_rasterizer; struct lp_bins; +struct lp_bins_queue; struct cmd_bin; struct pipe_screen; @@ -130,7 +131,8 @@ struct lp_rast_triangle { -struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen ); +struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen, + struct lp_bins_queue *empty ); void lp_rast_destroy( struct lp_rasterizer * ); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 5502419a92..4e4f8b36a7 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -95,6 +95,10 @@ struct lp_rasterizer boolean clipped_tile; boolean check_for_clipped_tiles; + struct lp_bins_queue *full_bins; + struct lp_bins_queue *empty_bins; + pipe_mutex get_bin_mutex; + /* Framebuffer stuff */ struct pipe_screen *screen; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 484a609e6e..c8cdc32853 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -37,6 +37,8 @@ #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_pack_color.h" +#include "lp_bin.h" +#include "lp_bin_queue.h" #include "lp_debug.h" #include "lp_state.h" #include "lp_buffer.h" @@ -44,6 +46,10 @@ #include "lp_setup_context.h" +/** XXX temporary value, temporary here */ +#define MAX_BINS 2 + + static void set_state( struct setup_context *, unsigned ); @@ -554,6 +560,14 @@ lp_setup_destroy( struct setup_context *setup ) lp_bins_destroy(setup->bins); + /* free the bins in the 'empty' queue */ + while (lp_bins_queue_size(setup->empty_bins) > 0) { + struct lp_bins *bins = lp_bins_dequeue(setup->empty_bins); + if (!bins) + break; + lp_bins_destroy(bins); + } + lp_rast_destroy( setup->rast ); FREE( setup ); @@ -567,14 +581,28 @@ lp_setup_destroy( struct setup_context *setup ) struct setup_context * lp_setup_create( struct pipe_screen *screen ) { + unsigned i; struct setup_context *setup = CALLOC_STRUCT(setup_context); - setup->rast = lp_rast_create( screen ); + if (!setup) + return NULL; + + setup->empty_bins = lp_bins_queue_create(); + if (!setup->empty_bins) + goto fail; + + setup->rast = lp_rast_create( screen, setup->empty_bins ); if (!setup->rast) goto fail; setup->bins = lp_bins_create(); + /* create some empty bins */ + for (i = 0; i < MAX_BINS; i++) { + struct lp_bins *bins = lp_bins_create(); + lp_bins_enqueue(setup->empty_bins, bins); + } + setup->triangle = first_triangle; setup->line = first_line; setup->point = first_point; @@ -584,6 +612,9 @@ lp_setup_create( struct pipe_screen *screen ) return setup; fail: + if (setup->empty_bins) + lp_bins_queue_destroy(setup->empty_bins); + FREE(setup); return NULL; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 782c05122c..584e37665b 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -46,6 +46,9 @@ #define LP_SETUP_NEW_BLEND_COLOR 0x04 +struct lp_bins_queue; + + /** * Point/line/triangle setup context. * Note: "stored" below indicates data which is stored in the bins, @@ -55,7 +58,9 @@ struct setup_context { struct lp_rasterizer *rast; - struct lp_bins *bins; + + struct lp_bins *bins; /**< current bins */ + struct lp_bins_queue *empty_bins; /**< queue of empty bins */ boolean ccw_is_frontface; unsigned cullmode; -- cgit v1.2.3 From 288ea9770a2c9323ffa9a4b9f3a818d8aa02acd9 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 9 Dec 2009 14:53:53 -0700 Subject: progs/demos: call glutDestroyWindow() in gloss.c --- progs/demos/gloss.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/progs/demos/gloss.c b/progs/demos/gloss.c index 69694b23a0..56d48b5a9e 100644 --- a/progs/demos/gloss.c +++ b/progs/demos/gloss.c @@ -41,6 +41,7 @@ /* for convolution */ #define FILTER_SIZE 7 +static GLint Win; static GLint WinWidth = 500, WinHeight = 500; static GLuint CylinderObj = 0; static GLuint TeapotObj = 0; @@ -215,6 +216,7 @@ static void Key( unsigned char key, int x, int y ) ToggleAnimate(); break; case 27: + glutDestroyWindow(Win); exit(0); break; } @@ -439,7 +441,7 @@ int main( int argc, char *argv[] ) glutInit( &argc, argv ); glutInitWindowSize(WinWidth, WinHeight); glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE | GLUT_DEPTH ); - glutCreateWindow(argv[0] ); + Win = glutCreateWindow(argv[0] ); glewInit(); glutReshapeFunc( Reshape ); glutKeyboardFunc( Key ); -- cgit v1.2.3 From 3bee8c2e7c17893f91f6b62e2db090ef495dca9d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 9 Dec 2009 16:02:30 -0700 Subject: llvmpipe: use the empty_bins queue now --- src/gallium/drivers/llvmpipe/lp_rast.c | 8 ++++++++ src/gallium/drivers/llvmpipe/lp_setup.c | 19 +++++++++++++------ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 0471ad7e2f..3165128f8f 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -569,6 +569,10 @@ lp_rasterize_bins( struct lp_rasterizer *rast, /* no threading */ lp_bin_iter_begin( bins ); rasterize_bins( rast, 0, bins, fb, write_depth ); + + /* reset bins and put into the empty queue */ + lp_reset_bins( bins ); + lp_bins_enqueue( rast->empty_bins, bins); } else { /* threaded rendering! */ @@ -589,6 +593,10 @@ lp_rasterize_bins( struct lp_rasterizer *rast, for (i = 0; i < rast->num_threads; i++) { pipe_semaphore_wait(&rast->tasks[i].work_done); } + + /* reset bins and put into the empty queue */ + lp_reset_bins( bins ); + lp_bins_enqueue( rast->empty_bins, bins); } lp_rast_end( rast ); diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index c8cdc32853..889f92a0d5 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -56,7 +56,17 @@ static void set_state( struct setup_context *, unsigned ); struct lp_bins * lp_setup_get_current_bins(struct setup_context *setup) { - /* XXX eventually get bin from queue */ + if (!setup->bins) { + /* wait for a free/empty bin */ + setup->bins = lp_bins_dequeue(setup->empty_bins); + if(0)lp_reset_bins( setup->bins ); /* XXX temporary? */ + + if (setup->fb) { + unsigned tiles_x = align(setup->fb->width, TILE_SIZE) / TILE_SIZE; + unsigned tiles_y = align(setup->fb->height, TILE_SIZE) / TILE_SIZE; + lp_bin_set_num_bins(setup->bins, tiles_x, tiles_y); + } + } return setup->bins; } @@ -101,7 +111,8 @@ static void reset_context( struct setup_context *setup ) setup->fs.stored = NULL; setup->dirty = ~0; - lp_reset_bins( setup->bins ); + /* no current bin */ + setup->bins = NULL; /* Reset some state: */ @@ -558,8 +569,6 @@ lp_setup_destroy( struct setup_context *setup ) pipe_buffer_reference(&setup->constants.current, NULL); - lp_bins_destroy(setup->bins); - /* free the bins in the 'empty' queue */ while (lp_bins_queue_size(setup->empty_bins) > 0) { struct lp_bins *bins = lp_bins_dequeue(setup->empty_bins); @@ -595,8 +604,6 @@ lp_setup_create( struct pipe_screen *screen ) if (!setup->rast) goto fail; - setup->bins = lp_bins_create(); - /* create some empty bins */ for (i = 0; i < MAX_BINS; i++) { struct lp_bins *bins = lp_bins_create(); -- cgit v1.2.3 From edf11da7f8e2fbe090e60e58c12c6a5ece3089bc Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 9 Dec 2009 16:23:30 -0700 Subject: progs/demos/gloss: press 'n' to advance by one frame --- progs/demos/gloss.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/progs/demos/gloss.c b/progs/demos/gloss.c index 56d48b5a9e..d32e8f8c68 100644 --- a/progs/demos/gloss.c +++ b/progs/demos/gloss.c @@ -215,6 +215,9 @@ static void Key( unsigned char key, int x, int y ) case ' ': ToggleAnimate(); break; + case 'n': + Idle(); + break; case 27: glutDestroyWindow(Win); exit(0); -- cgit v1.2.3 From ad3c16c127f167513a136759a1700e111a0ef7b8 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 9 Dec 2009 16:30:05 -0700 Subject: llvmpipe: simplify the tiles_x, tiles_y code a bit --- src/gallium/drivers/llvmpipe/lp_bin.c | 10 ++++++---- src/gallium/drivers/llvmpipe/lp_bin.h | 5 ++--- src/gallium/drivers/llvmpipe/lp_setup.c | 12 +++--------- 3 files changed, 11 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_bin.c b/src/gallium/drivers/llvmpipe/lp_bin.c index f2d3c2df4d..703cdd2de5 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin.c +++ b/src/gallium/drivers/llvmpipe/lp_bin.c @@ -25,6 +25,7 @@ * **************************************************************************/ +#include "util/u_math.h" #include "util/u_memory.h" #include "lp_bin.h" @@ -137,13 +138,14 @@ lp_free_bin_data(struct lp_bins *bins) void -lp_bin_set_num_bins( struct lp_bins *bins, - unsigned tiles_x, unsigned tiles_y ) +lp_bin_set_framebuffer_size( struct lp_bins *bins, + unsigned width, unsigned height ) { - bins->tiles_x = tiles_x; - bins->tiles_y = tiles_y; + bins->tiles_x = align(width, TILE_SIZE) / TILE_SIZE; + bins->tiles_y = align(height, TILE_SIZE) / TILE_SIZE; } + void lp_bin_new_cmd_block( struct cmd_block_list *list ) { diff --git a/src/gallium/drivers/llvmpipe/lp_bin.h b/src/gallium/drivers/llvmpipe/lp_bin.h index c49b0264d6..4394e7bda0 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin.h +++ b/src/gallium/drivers/llvmpipe/lp_bin.h @@ -131,9 +131,8 @@ void lp_reset_bins(struct lp_bins *bins ); void lp_free_bin_data(struct lp_bins *bins); -void -lp_bin_set_num_bins( struct lp_bins *bins, - unsigned tiles_x, unsigned tiles_y ); +void lp_bin_set_framebuffer_size( struct lp_bins *bins, + unsigned width, unsigned height ); void lp_bin_new_data_block( struct data_block_list *list ); diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 889f92a0d5..3ef9cdaa0c 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -34,7 +34,6 @@ #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "util/u_math.h" #include "util/u_memory.h" #include "util/u_pack_color.h" #include "lp_bin.h" @@ -62,9 +61,8 @@ lp_setup_get_current_bins(struct setup_context *setup) if(0)lp_reset_bins( setup->bins ); /* XXX temporary? */ if (setup->fb) { - unsigned tiles_x = align(setup->fb->width, TILE_SIZE) / TILE_SIZE; - unsigned tiles_y = align(setup->fb->height, TILE_SIZE) / TILE_SIZE; - lp_bin_set_num_bins(setup->bins, tiles_x, tiles_y); + lp_bin_set_framebuffer_size(setup->bins, + setup->fb->width, setup->fb->height); } } return setup->bins; @@ -244,7 +242,6 @@ lp_setup_bind_framebuffer( struct setup_context *setup, const struct pipe_framebuffer_state *fb ) { struct lp_bins *bins = lp_setup_get_current_bins(setup); - unsigned tiles_x, tiles_y; LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -252,10 +249,7 @@ lp_setup_bind_framebuffer( struct setup_context *setup, setup->fb = fb; - tiles_x = align(setup->fb->width, TILE_SIZE) / TILE_SIZE; - tiles_y = align(setup->fb->height, TILE_SIZE) / TILE_SIZE; - - lp_bin_set_num_bins(bins, tiles_x, tiles_y); + lp_bin_set_framebuffer_size(bins, setup->fb->width, setup->fb->height); } -- cgit v1.2.3 From 96689d451a24753e088f40fb167c3cb26d8045ac Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 9 Dec 2009 16:32:32 -0700 Subject: llvmpipe: added some debug/info code --- src/gallium/drivers/llvmpipe/lp_rast.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 3165128f8f..5659ae2ca5 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -40,6 +40,7 @@ + /** * Begin the rasterization phase. * Map the framebuffer surfaces. Initialize the 'rast' state. @@ -555,8 +556,22 @@ lp_rasterize_bins( struct lp_rasterizer *rast, const struct pipe_framebuffer_state *fb, bool write_depth ) { + boolean debug = false; + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + if (debug) { + unsigned x, y; + printf("rasterize bins:\n"); + printf(" data size: %u\n", lp_bin_data_size(bins)); + for (y = 0; y < bins->tiles_y; y++) { + for (x = 0; x < bins->tiles_x; x++) { + printf(" bin %u, %u size: %u\n", x, y, + lp_bin_cmd_size(bins, x, y)); + } + } + } + lp_rast_begin( rast, fb->cbufs[0], fb->zsbuf, -- cgit v1.2.3 From 21008441e4609c8590ede093a549ef689516ddd4 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 10 Dec 2009 10:59:46 -0700 Subject: llvmpipe: updated comments --- src/gallium/drivers/llvmpipe/lp_rast.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 0000fbc5c7..75f7b74404 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -41,17 +41,13 @@ #include "lp_jit.h" -/* Initially create and program a single rasterizer directly. Later - * will want multiple of these, one or two per core. At that stage - * will probably pass command buffers into the rasterizers rather than - * individual function calls like this. - */ struct lp_rasterizer; struct lp_bins; struct lp_bins_queue; struct cmd_bin; struct pipe_screen; +/** For sub-pixel positioning */ #define FIXED_ORDER 4 #define FIXED_ONE (1< Date: Thu, 10 Dec 2009 14:54:32 -0700 Subject: llvmpipe: added some bin queue debug code --- src/gallium/drivers/llvmpipe/lp_bin_queue.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_bin_queue.c b/src/gallium/drivers/llvmpipe/lp_bin_queue.c index 19e1a5827b..0fda0b5bae 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin_queue.c +++ b/src/gallium/drivers/llvmpipe/lp_bin_queue.c @@ -122,6 +122,14 @@ lp_bins_enqueue(struct lp_bins_queue *queue, struct lp_bins *bins) assert(queue->size < MAX_BINS); + /* debug: check that bins is not already in the queue */ + if (0) { + unsigned i; + for (i = 0; i < queue->size; i++) { + assert(queue->bins[i] != bins); + } + } + /* add to end */ queue->bins[queue->size++] = bins; -- cgit v1.2.3 From a67f39810b5c88367ae2a9ee564b1a740b27601b Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 10 Dec 2009 14:54:57 -0700 Subject: gallium/util: added framebuffer compare, copy util funcs --- src/gallium/auxiliary/util/u_surface.c | 49 ++++++++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_surface.h | 9 +++++++ 2 files changed, 58 insertions(+) diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c index 85e443204e..a95b887e84 100644 --- a/src/gallium/auxiliary/util/u_surface.c +++ b/src/gallium/auxiliary/util/u_surface.c @@ -36,6 +36,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" +#include "util/u_memory.h" #include "util/u_surface.h" @@ -111,3 +112,51 @@ util_destroy_rgba_surface(struct pipe_texture *texture, pipe_texture_reference(&texture, NULL); } + + +/** + * Compare pipe_framebuffer_state objects. + * \return TRUE if same, FALSE if different + */ +boolean +util_framebuffer_state_equal(const struct pipe_framebuffer_state *dst, + const struct pipe_framebuffer_state *src) +{ + boolean changed = FALSE; + unsigned i; + + for (i = 0; i < Elements(src->cbufs); i++) { + if (dst->cbufs[i] != src->cbufs[i]) { + changed = TRUE; + } + } + + if (dst->nr_cbufs != src->nr_cbufs) { + changed = TRUE; + } + + if (dst->zsbuf != src->zsbuf) { + changed = TRUE; + } + + return changed; +} + + +/** + * Copy framebuffer state from src to dst, updating refcounts. + */ +void +util_copy_framebuffer_state(struct pipe_framebuffer_state *dst, + const struct pipe_framebuffer_state *src) +{ + unsigned i; + + for (i = 0; i < Elements(src->cbufs); i++) { + pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]); + } + + dst->nr_cbufs = src->nr_cbufs; + + pipe_surface_reference(&dst->zsbuf, src->zsbuf); +} diff --git a/src/gallium/auxiliary/util/u_surface.h b/src/gallium/auxiliary/util/u_surface.h index ce84ed7ad0..a9da9aadcb 100644 --- a/src/gallium/auxiliary/util/u_surface.h +++ b/src/gallium/auxiliary/util/u_surface.h @@ -66,4 +66,13 @@ util_destroy_rgba_surface(struct pipe_texture *texture, struct pipe_surface *surface); +extern boolean +util_framebuffer_state_equal(const struct pipe_framebuffer_state *dst, + const struct pipe_framebuffer_state *src); + +extern void +util_copy_framebuffer_state(struct pipe_framebuffer_state *dst, + const struct pipe_framebuffer_state *src); + + #endif /* U_SURFACE_H */ -- cgit v1.2.3 From 6d810e5a7b082b9769a4ede4661536ae0e070dd2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 10 Dec 2009 14:55:28 -0700 Subject: llvmpipe: simplify llvmpipe_set_framebuffer_state() --- src/gallium/drivers/llvmpipe/lp_state_surface.c | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c index 3eff40e3f1..0263f2a624 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -28,6 +28,8 @@ /* Authors: Keith Whitwell */ +#include "pipe/p_state.h" +#include "util/u_surface.h" #include "lp_context.h" #include "lp_state.h" #include "lp_surface.h" @@ -44,27 +46,12 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, const struct pipe_framebuffer_state *fb) { struct llvmpipe_context *lp = llvmpipe_context(pipe); - uint i; - boolean dirty = FALSE; - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { - if (lp->framebuffer.cbufs[i] != fb->cbufs[i]) { - pipe_surface_reference(&lp->framebuffer.cbufs[i], fb->cbufs[i]); - dirty = TRUE; - } - } + boolean changed = util_framebuffer_state_equal(&lp->framebuffer, fb); - if (lp->framebuffer.nr_cbufs != fb->nr_cbufs) { - dirty = TRUE; - lp->framebuffer.nr_cbufs = fb->nr_cbufs; - } + if (changed) { - /* zbuf changing? */ - if (lp->framebuffer.zsbuf != fb->zsbuf) { - dirty = TRUE; - - /* assign new */ - pipe_surface_reference(&lp->framebuffer.zsbuf, fb->zsbuf); + util_copy_framebuffer_state(&lp->framebuffer, fb); /* Tell draw module how deep the Z/depth buffer is */ if (lp->framebuffer.zsbuf) { @@ -80,9 +67,7 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, } draw_set_mrd(lp->draw, mrd); } - } - if (dirty) { lp_setup_bind_framebuffer( lp->setup, fb ); lp->dirty |= LP_NEW_FRAMEBUFFER; -- cgit v1.2.3 From 9a6567f1ed88727545f747e8670b713f17627c94 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 10 Dec 2009 14:56:11 -0700 Subject: llvmpipe: updated comment --- src/gallium/drivers/llvmpipe/lp_setup.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 3ef9cdaa0c..d976934a5d 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -280,9 +280,10 @@ lp_setup_clear( struct setup_context *setup, if (setup->state == SETUP_ACTIVE) { /* Add the clear to existing bins. In the unusual case where - * both color and depth-stencilare being cleared, we could - * discard the currently binned scene and start again, but I - * don't see that as being a common usage. + * both color and depth-stencil are being cleared when there's + * already been some rendering, we could discard the currently + * binned scene and start again, but I don't see that as being + * a common usage. */ if (flags & PIPE_CLEAR_COLOR) lp_bin_everywhere( bins, -- cgit v1.2.3 From 9509f73c2147a9e225b5ef69a646e5dd711573f5 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 10 Dec 2009 14:56:30 -0700 Subject: llvmpipe: checkpoint: use empty/full bin queues --- src/gallium/drivers/llvmpipe/lp_rast.c | 64 +++++++++++++++++++++++++---- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 8 ++++ 2 files changed, 64 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 5659ae2ca5..0cd95e0ca7 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -40,6 +40,46 @@ +/** + * Called by rasterization threads to get the next chunk of work. + * We use a lock to make sure that all the threads get the same bins. + */ +static struct lp_bins * +get_next_full_bin( struct lp_rasterizer *rast ) +{ + pipe_mutex_lock( rast->get_bin_mutex ); + if (!rast->curr_bins) { + /* this will wait until there's something in the queue */ + rast->curr_bins = lp_bins_dequeue( rast->full_bins ); + rast->release_count = 0; + + lp_bin_iter_begin( rast->curr_bins ); + } + pipe_mutex_unlock( rast->get_bin_mutex ); + return rast->curr_bins; +} + + +/** + * Called by rasterization threads after they've finished with + * the current bin. When all threads have called this, we reset + * the bin and put it into the 'empty bins' queue. + */ +static void +release_current_bin( struct lp_rasterizer *rast ) +{ + pipe_mutex_lock( rast->get_bin_mutex ); + rast->release_count++; + if (rast->release_count == rast->num_threads) { + assert(rast->curr_bins); + lp_reset_bins( rast->curr_bins ); + lp_bins_enqueue( rast->empty_bins, rast->curr_bins ); + rast->curr_bins = NULL; + } + pipe_mutex_unlock( rast->get_bin_mutex ); +} + + /** * Begin the rasterization phase. @@ -488,6 +528,7 @@ lp_rast_end_tile( struct lp_rasterizer *rast, * Rasterize commands for a single bin. * \param x, y position of the bin's tile in the framebuffer * Must be called between lp_rast_begin() and lp_rast_end(). + * Called per thread. */ static void rasterize_bin( struct lp_rasterizer *rast, @@ -514,6 +555,7 @@ rasterize_bin( struct lp_rasterizer *rast, /** * Rasterize/execute all bins. + * Called per thread. */ static void rasterize_bins( struct lp_rasterizer *rast, @@ -539,6 +581,7 @@ rasterize_bins( struct lp_rasterizer *rast, struct cmd_bin *bin; int x, y; + assert(bins); while ((bin = lp_bin_iter_next(bins, &x, &y))) { rasterize_bin( rast, thread_index, bin, x * TILE_SIZE, y * TILE_SIZE); } @@ -593,11 +636,13 @@ lp_rasterize_bins( struct lp_rasterizer *rast, /* threaded rendering! */ unsigned i; - rast->bins = bins; + lp_bins_enqueue( rast->full_bins, bins ); + + /* XXX need to move/fix these */ rast->fb = fb; rast->write_depth = write_depth; - lp_bin_iter_begin( bins ); + /*lp_bin_iter_begin( bins );*/ /* signal the threads that there's work to do */ for (i = 0; i < rast->num_threads; i++) { @@ -608,10 +653,6 @@ lp_rasterize_bins( struct lp_rasterizer *rast, for (i = 0; i < rast->num_threads; i++) { pipe_semaphore_wait(&rast->tasks[i].work_done); } - - /* reset bins and put into the empty queue */ - lp_reset_bins( bins ); - lp_bins_enqueue( rast->empty_bins, bins); } lp_rast_end( rast ); @@ -632,19 +673,26 @@ thread_func( void *init_data ) { struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data; struct lp_rasterizer *rast = task->rast; - int debug = 0; + boolean debug = false; while (1) { + struct lp_bins *bins; + /* wait for work */ if (debug) debug_printf("thread %d waiting for work\n", task->thread_index); pipe_semaphore_wait(&task->work_ready); + bins = get_next_full_bin( rast ); + assert(bins); + /* do work */ if (debug) debug_printf("thread %d doing work\n", task->thread_index); rasterize_bins(rast, task->thread_index, - rast->bins, rast->fb, rast->write_depth); + bins, rast->fb, rast->write_depth); + + release_current_bin( rast ); /* signal done with work */ if (debug) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 4e4f8b36a7..f174aa1505 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -95,10 +95,18 @@ struct lp_rasterizer boolean clipped_tile; boolean check_for_clipped_tiles; + /** The incoming queue of filled bins to rasterize */ struct lp_bins_queue *full_bins; + /** The outgoing queue of emptied bins to return to setup modulee */ struct lp_bins_queue *empty_bins; + pipe_mutex get_bin_mutex; + /** The bins currently being rasterized by the threads */ + struct lp_bins *curr_bins; + /** Counter to determine when all threads are done with current bin */ + unsigned release_count; + /* Framebuffer stuff */ struct pipe_screen *screen; -- cgit v1.2.3 From 4e67f10331bfd87560e2900e66f3b942902bc65c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 11:16:48 -0700 Subject: llvmpipe: minor comment fix --- src/gallium/drivers/llvmpipe/lp_rast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 0cd95e0ca7..2c9c13ab22 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -591,7 +591,7 @@ rasterize_bins( struct lp_rasterizer *rast, /** - * Called by rasterizer when it has something for us to render. + * Called by setup module when it has something for us to render. */ void lp_rasterize_bins( struct lp_rasterizer *rast, -- cgit v1.2.3 From 205da96fc64a197b7d1a15010456402030d8893b Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 11:25:22 -0700 Subject: llvmpipe: remove unused fb parameter --- src/gallium/drivers/llvmpipe/lp_rast.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 2c9c13ab22..7083029e45 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -561,7 +561,6 @@ static void rasterize_bins( struct lp_rasterizer *rast, unsigned thread_index, struct lp_bins *bins, - const struct pipe_framebuffer_state *fb, bool write_depth ) { /* loop over tile bins, rasterize each */ @@ -626,7 +625,7 @@ lp_rasterize_bins( struct lp_rasterizer *rast, if (rast->num_threads == 0) { /* no threading */ lp_bin_iter_begin( bins ); - rasterize_bins( rast, 0, bins, fb, write_depth ); + rasterize_bins( rast, 0, bins, write_depth ); /* reset bins and put into the empty queue */ lp_reset_bins( bins ); @@ -690,7 +689,7 @@ thread_func( void *init_data ) if (debug) debug_printf("thread %d doing work\n", task->thread_index); rasterize_bins(rast, task->thread_index, - bins, rast->fb, rast->write_depth); + bins, rast->write_depth); release_current_bin( rast ); -- cgit v1.2.3 From 8f2a1736635368951c3f30e484ee6137066964d6 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 11:39:53 -0700 Subject: gallium/util: simplify util_framebuffer_state_equal() And copy width, height in util_copy_framebuffer_state(). --- src/gallium/auxiliary/util/u_surface.c | 16 +++++++++++----- src/gallium/auxiliary/util/u_surface.h | 10 +++++----- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c index a95b887e84..daaa275ef2 100644 --- a/src/gallium/auxiliary/util/u_surface.c +++ b/src/gallium/auxiliary/util/u_surface.c @@ -122,24 +122,27 @@ boolean util_framebuffer_state_equal(const struct pipe_framebuffer_state *dst, const struct pipe_framebuffer_state *src) { - boolean changed = FALSE; unsigned i; + if (dst->width != src->width || + dst->height != src->height) + return FALSE; + for (i = 0; i < Elements(src->cbufs); i++) { if (dst->cbufs[i] != src->cbufs[i]) { - changed = TRUE; + return FALSE; } } if (dst->nr_cbufs != src->nr_cbufs) { - changed = TRUE; + return FALSE; } if (dst->zsbuf != src->zsbuf) { - changed = TRUE; + return FALSE; } - return changed; + return TRUE; } @@ -152,6 +155,9 @@ util_copy_framebuffer_state(struct pipe_framebuffer_state *dst, { unsigned i; + dst->width = src->width; + dst->height = src->height; + for (i = 0; i < Elements(src->cbufs); i++) { pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]); } diff --git a/src/gallium/auxiliary/util/u_surface.h b/src/gallium/auxiliary/util/u_surface.h index a9da9aadcb..3c60df2c3e 100644 --- a/src/gallium/auxiliary/util/u_surface.h +++ b/src/gallium/auxiliary/util/u_surface.h @@ -30,11 +30,7 @@ #include "pipe/p_compiler.h" - - -struct pipe_screen; -struct pipe_texture; -struct pipe_surface; +#include "pipe/p_state.h" /** @@ -75,4 +71,8 @@ util_copy_framebuffer_state(struct pipe_framebuffer_state *dst, const struct pipe_framebuffer_state *src); +extern void +util_unreference_framebuffer_state(struct pipe_framebuffer_state *fb); + + #endif /* U_SURFACE_H */ -- cgit v1.2.3 From d1fa748cdba0b1145066186b3d634b79b5d69473 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 11:40:52 -0700 Subject: gallium/util: added util_unreference_framebuffer_state() --- src/gallium/auxiliary/util/u_surface.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c index daaa275ef2..cfdf7ab8f8 100644 --- a/src/gallium/auxiliary/util/u_surface.c +++ b/src/gallium/auxiliary/util/u_surface.c @@ -166,3 +166,19 @@ util_copy_framebuffer_state(struct pipe_framebuffer_state *dst, pipe_surface_reference(&dst->zsbuf, src->zsbuf); } + + +void +util_unreference_framebuffer_state(struct pipe_framebuffer_state *fb) +{ + unsigned i; + + for (i = 0; i < fb->nr_cbufs; i++) { + pipe_surface_reference(&fb->cbufs[i], NULL); + } + + pipe_surface_reference(&fb->zsbuf, NULL); + + fb->width = fb->height = 0; + fb->nr_cbufs = 0; +} -- cgit v1.2.3 From 544882eb58253a4538ccc90ae091abed353416b4 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 11:41:11 -0700 Subject: llvmpipe: fix inverted util_framebuffer_state_equal() result --- src/gallium/drivers/llvmpipe/lp_state_surface.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c index 0263f2a624..21565436eb 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -47,7 +47,7 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, { struct llvmpipe_context *lp = llvmpipe_context(pipe); - boolean changed = util_framebuffer_state_equal(&lp->framebuffer, fb); + boolean changed = !util_framebuffer_state_equal(&lp->framebuffer, fb); if (changed) { -- cgit v1.2.3 From 9d0faea58cee28cf16bd31e6adbb2d93c391c556 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 11:42:18 -0700 Subject: llvmpipe: fix-up #includes --- src/gallium/drivers/llvmpipe/lp_bin_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_bin_queue.c b/src/gallium/drivers/llvmpipe/lp_bin_queue.c index 0fda0b5bae..b4bc439089 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin_queue.c +++ b/src/gallium/drivers/llvmpipe/lp_bin_queue.c @@ -34,7 +34,7 @@ #include "pipe/p_thread.h" -#include "lp_bin.h" +#include "util/u_memory.h" #include "lp_bin_queue.h" -- cgit v1.2.3 From 156eabbaf996f471458ee2a69078674277b89067 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 11:46:23 -0700 Subject: llvmpipe: improve framebuffer/surface code --- src/gallium/drivers/llvmpipe/lp_rast.c | 70 ++++++++++++++--------------- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 5 +-- 2 files changed, 34 insertions(+), 41 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 7083029e45..97233e1700 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -28,6 +28,7 @@ #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_cpu_detect.h" +#include "util/u_surface.h" #include "lp_bin_queue.h" #include "lp_debug.h" @@ -87,28 +88,25 @@ release_current_bin( struct lp_rasterizer *rast ) */ static boolean lp_rast_begin( struct lp_rasterizer *rast, - struct pipe_surface *cbuf, - struct pipe_surface *zsbuf, - boolean write_color, - boolean write_zstencil, - unsigned width, - unsigned height ) + const struct pipe_framebuffer_state *fb, + boolean write_color, + boolean write_zstencil ) { struct pipe_screen *screen = rast->screen; + struct pipe_surface *cbuf, *zsbuf; - LP_DBG(DEBUG_RAST, "%s %dx%d\n", __FUNCTION__, width, height); + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); - pipe_surface_reference(&rast->state.cbuf, cbuf); - pipe_surface_reference(&rast->state.zsbuf, zsbuf); + util_copy_framebuffer_state(&rast->state.fb, fb); - rast->width = width; - rast->height = height; rast->state.write_zstencil = write_zstencil; rast->state.write_color = write_color; - rast->check_for_clipped_tiles = (width % TILE_SIZE != 0 || - height % TILE_SIZE != 0); + rast->check_for_clipped_tiles = (fb->width % TILE_SIZE != 0 || + fb->height % TILE_SIZE != 0); + /* XXX support multiple color buffers here */ + cbuf = rast->state.fb.cbufs[0]; if (cbuf) { rast->cbuf_transfer = screen->get_tex_transfer(rast->screen, cbuf->texture, @@ -116,7 +114,8 @@ lp_rast_begin( struct lp_rasterizer *rast, cbuf->level, cbuf->zslice, PIPE_TRANSFER_READ_WRITE, - 0, 0, width, height); + 0, 0, + fb->width, fb->height); if (!rast->cbuf_transfer) return FALSE; @@ -126,14 +125,16 @@ lp_rast_begin( struct lp_rasterizer *rast, return FALSE; } + zsbuf = rast->state.fb.zsbuf; if (zsbuf) { rast->zsbuf_transfer = screen->get_tex_transfer(rast->screen, - zsbuf->texture, - zsbuf->face, - zsbuf->level, - zsbuf->zslice, - PIPE_TRANSFER_READ_WRITE, - 0, 0, width, height); + zsbuf->texture, + zsbuf->face, + zsbuf->level, + zsbuf->zslice, + PIPE_TRANSFER_READ_WRITE, + 0, 0, + fb->width, fb->height); if (!rast->zsbuf_transfer) return FALSE; @@ -442,11 +443,11 @@ static void lp_rast_store_color( struct lp_rasterizer *rast, int w = TILE_SIZE; int h = TILE_SIZE; - if (x + w > rast->width) - w -= x + w - rast->width; + if (x + w > rast->state.fb.width) + w -= x + w - rast->state.fb.width; - if (y + h > rast->height) - h -= y + h - rast->height; + if (y + h > rast->state.fb.height) + h -= y + h - rast->state.fb.height; assert(w >= 0); assert(h >= 0); @@ -491,11 +492,11 @@ static void lp_rast_store_zstencil( struct lp_rasterizer *rast, unsigned w = TILE_SIZE; unsigned h = TILE_SIZE; - if (x + w > rast->width) - w -= x + w - rast->width; + if (x + w > rast->state.fb.width) + w -= x + w - rast->state.fb.width; - if (y + h > rast->height) - h -= y + h - rast->height; + if (y + h > rast->state.fb.height) + h -= y + h - rast->state.fb.height; LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); @@ -614,13 +615,9 @@ lp_rasterize_bins( struct lp_rasterizer *rast, } } - lp_rast_begin( rast, - fb->cbufs[0], - fb->zsbuf, - fb->cbufs[0] != NULL, - fb->zsbuf != NULL && write_depth, - fb->width, - fb->height ); + lp_rast_begin( rast, fb, + fb->cbufs[0]!= NULL, + fb->zsbuf != NULL && write_depth ); if (rast->num_threads == 0) { /* no threading */ @@ -765,8 +762,7 @@ void lp_rast_destroy( struct lp_rasterizer *rast ) { unsigned i; - pipe_surface_reference(&rast->state.cbuf, NULL); - pipe_surface_reference(&rast->state.zsbuf, NULL); + util_unreference_framebuffer_state(&rast->state.fb); for (i = 0; i < Elements(rast->tasks); i++) { align_free(rast->tasks[i].tile.depth); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index f174aa1505..abe791fd00 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -90,8 +90,6 @@ struct lp_rasterizer_task */ struct lp_rasterizer { - unsigned width, height; /**< Size of framebuffer, in pixels */ - boolean clipped_tile; boolean check_for_clipped_tiles; @@ -116,8 +114,7 @@ struct lp_rasterizer void *zsbuf_map; struct { - struct pipe_surface *cbuf; - struct pipe_surface *zsbuf; + struct pipe_framebuffer_state fb; boolean write_color; boolean write_zstencil; unsigned clear_color; -- cgit v1.2.3 From de31b0e60c4b68b73b8983a0ae3e8f3f61d9d583 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 11:56:22 -0700 Subject: llvmpipe: remove unused lp_rasterizer::fb field --- src/gallium/drivers/llvmpipe/lp_rast.c | 1 - src/gallium/drivers/llvmpipe/lp_rast_priv.h | 1 - 2 files changed, 2 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 97233e1700..a8212d74e3 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -635,7 +635,6 @@ lp_rasterize_bins( struct lp_rasterizer *rast, lp_bins_enqueue( rast->full_bins, bins ); /* XXX need to move/fix these */ - rast->fb = fb; rast->write_depth = write_depth; /*lp_bin_iter_begin( bins );*/ diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index abe791fd00..4ae54ac8c1 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -129,7 +129,6 @@ struct lp_rasterizer pipe_thread threads[MAX_THREADS]; struct lp_bins *bins; - const struct pipe_framebuffer_state *fb; boolean write_depth; }; -- cgit v1.2.3 From 0fc90dfa280e12a100c6c7c632d5d29c16118c9a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 14:00:25 -0700 Subject: gallium: added pipe_barrier type and functions --- src/gallium/include/pipe/p_thread.h | 61 ++++++++++++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/src/gallium/include/pipe/p_thread.h b/src/gallium/include/pipe/p_thread.h index 45c35a87d0..ba5cd589f8 100644 --- a/src/gallium/include/pipe/p_thread.h +++ b/src/gallium/include/pipe/p_thread.h @@ -27,7 +27,8 @@ /** * @file * - * Thread, mutex, condition var and thread-specific data functions. + * Thread, mutex, condition variable, barrier, semaphore and + * thread-specific data functions. */ @@ -106,6 +107,24 @@ typedef pthread_cond_t pipe_condvar; pthread_cond_broadcast(&(cond)) +typedef pthread_barrier_t pipe_barrier; + +static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count) +{ + pthread_barrier_init(barrier, NULL, count); +} + +static INLINE void pipe_barrier_destroy(pipe_barrier *barrier) +{ + pthread_barrier_destroy(barrier); +} + +static INLINE void pipe_barrier_wait(pipe_barrier *barrier) +{ + pthread_barrier_wait(barrier); +} + + #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) #include @@ -162,6 +181,27 @@ typedef unsigned pipe_condvar; #define pipe_condvar_broadcast(condvar) \ (void) condvar + +typedef unsigned pipe_barrier; + +static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count) +{ + /* XXX we could implement barriers with a mutex and condition var */ + assert(0); +} + +static INLINE void pipe_barrier_destroy(pipe_barrier *barrier) +{ + assert(0); +} + +static INLINE void pipe_barrier_wait(pipe_barrier *barrier) +{ + assert(0); +} + + + #else /** Dummy definitions */ @@ -169,6 +209,7 @@ typedef unsigned pipe_condvar; typedef unsigned pipe_thread; typedef unsigned pipe_mutex; typedef unsigned pipe_condvar; +typedef unsigned pipe_barrier; #define pipe_static_mutex(mutex) \ static pipe_mutex mutex = 0 @@ -204,6 +245,24 @@ typedef unsigned pipe_condvar; (void) condvar +static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count) +{ + /* XXX we could implement barriers with a mutex and condition var */ + assert(0); +} + +static INLINE void pipe_barrier_destroy(pipe_barrier *barrier) +{ + assert(0); +} + +static INLINE void pipe_barrier_wait(pipe_barrier *barrier) +{ + assert(0); +} + + + #endif /* PIPE_OS_? */ -- cgit v1.2.3 From 24d894e5579bd11fdf294d86834093e353abf4db Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 14:07:01 -0700 Subject: gallium: comments and minor re-org in p_thread.h There's more work to do in this file: 1. Implement condvars for Windows via Win32 CONDITION_VARIABLE type. 2. Implement barriers for Windows 3. Try to get rid of PIPE_THREAD_HAVE_CONDVAR (only used in trace driver) 4. Why the 2 in _P_THREAD2_H_? --- src/gallium/include/pipe/p_thread.h | 40 +++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/src/gallium/include/pipe/p_thread.h b/src/gallium/include/pipe/p_thread.h index ba5cd589f8..8119c1f571 100644 --- a/src/gallium/include/pipe/p_thread.h +++ b/src/gallium/include/pipe/p_thread.h @@ -47,6 +47,8 @@ #define PIPE_THREAD_HAVE_CONDVAR +/* pipe_thread + */ typedef pthread_t pipe_thread; #define PIPE_THREAD_ROUTINE( name, param ) \ @@ -70,8 +72,10 @@ static INLINE int pipe_thread_destroy( pipe_thread thread ) return pthread_detach( thread ); } + +/* pipe_mutex + */ typedef pthread_mutex_t pipe_mutex; -typedef pthread_cond_t pipe_condvar; #define pipe_static_mutex(mutex) \ static pipe_mutex mutex = PTHREAD_MUTEX_INITIALIZER @@ -88,6 +92,11 @@ typedef pthread_cond_t pipe_condvar; #define pipe_mutex_unlock(mutex) \ (void) pthread_mutex_unlock(&(mutex)) + +/* pipe_condvar + */ +typedef pthread_cond_t pipe_condvar; + #define pipe_static_condvar(mutex) \ static pipe_condvar mutex = PTHREAD_COND_INITIALIZER @@ -107,6 +116,8 @@ typedef pthread_cond_t pipe_condvar; pthread_cond_broadcast(&(cond)) +/* pipe_barrier + */ typedef pthread_barrier_t pipe_barrier; static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count) @@ -129,6 +140,8 @@ static INLINE void pipe_barrier_wait(pipe_barrier *barrier) #include +/* pipe_thread + */ typedef HANDLE pipe_thread; #define PIPE_THREAD_ROUTINE( name, param ) \ @@ -154,6 +167,9 @@ static INLINE int pipe_thread_destroy( pipe_thread thread ) return -1; } + +/* pipe_mutex + */ typedef CRITICAL_SECTION pipe_mutex; #define pipe_static_mutex(mutex) \ @@ -171,17 +187,29 @@ typedef CRITICAL_SECTION pipe_mutex; #define pipe_mutex_unlock(mutex) \ LeaveCriticalSection(&mutex) -/* XXX: dummy definitions, make it compile */ +/* pipe_condvar (XXX FIX THIS) + */ typedef unsigned pipe_condvar; -#define pipe_condvar_init(condvar) \ - (void) condvar +#define pipe_condvar_init(cond) \ + (void) cond -#define pipe_condvar_broadcast(condvar) \ - (void) condvar +#define pipe_condvar_destroy(cond) \ + (void) cond + +#define pipe_condvar_wait(cond, mutex) \ + (void) cond; (void) mutex + +#define pipe_condvar_signal(cond) \ + (void) cond + +#define pipe_condvar_broadcast(cond) \ + (void) cond +/* pipe_barrier (XXX FIX THIS) + */ typedef unsigned pipe_barrier; static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count) -- cgit v1.2.3 From 2bce5c195f94e2cce8f67c6a8066b0ae408487ce Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 14:52:42 -0700 Subject: llvmpipe: checkpoint: more thread/queuing changes Now mapping/unmapping the framebuffer is done by a rasteizer thread rather than the main calling thread. --- src/gallium/drivers/llvmpipe/lp_bin.h | 5 ++ src/gallium/drivers/llvmpipe/lp_rast.c | 130 +++++++++++++++------------- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 24 +++-- 3 files changed, 84 insertions(+), 75 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_bin.h b/src/gallium/drivers/llvmpipe/lp_bin.h index 4394e7bda0..565dd49f68 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin.h +++ b/src/gallium/drivers/llvmpipe/lp_bin.h @@ -108,6 +108,11 @@ struct lp_bins { struct cmd_bin tile[TILES_X][TILES_Y]; struct data_block_list data; + /** the framebuffer to render the bins into */ + struct pipe_framebuffer_state fb; + + boolean write_depth; + /** * Number of active tiles in each dimension. * This basically the framebuffer size divided by tile size diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index a8212d74e3..2ea3ac6b3b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -40,48 +40,6 @@ #include "lp_bin.h" - -/** - * Called by rasterization threads to get the next chunk of work. - * We use a lock to make sure that all the threads get the same bins. - */ -static struct lp_bins * -get_next_full_bin( struct lp_rasterizer *rast ) -{ - pipe_mutex_lock( rast->get_bin_mutex ); - if (!rast->curr_bins) { - /* this will wait until there's something in the queue */ - rast->curr_bins = lp_bins_dequeue( rast->full_bins ); - rast->release_count = 0; - - lp_bin_iter_begin( rast->curr_bins ); - } - pipe_mutex_unlock( rast->get_bin_mutex ); - return rast->curr_bins; -} - - -/** - * Called by rasterization threads after they've finished with - * the current bin. When all threads have called this, we reset - * the bin and put it into the 'empty bins' queue. - */ -static void -release_current_bin( struct lp_rasterizer *rast ) -{ - pipe_mutex_lock( rast->get_bin_mutex ); - rast->release_count++; - if (rast->release_count == rast->num_threads) { - assert(rast->curr_bins); - lp_reset_bins( rast->curr_bins ); - lp_bins_enqueue( rast->empty_bins, rast->curr_bins ); - rast->curr_bins = NULL; - } - pipe_mutex_unlock( rast->get_bin_mutex ); -} - - - /** * Begin the rasterization phase. * Map the framebuffer surfaces. Initialize the 'rast' state. @@ -525,6 +483,22 @@ lp_rast_end_tile( struct lp_rasterizer *rast, } +/** + * When all the threads are done rasterizing a bin, one thread will + * call this function to reset the bin and put it onto the empty queue. + */ +static void +release_bins( struct lp_rasterizer *rast, + struct lp_bins *bins ) +{ + util_unreference_framebuffer_state( &bins->fb ); + + lp_reset_bins( bins ); + lp_bins_enqueue( rast->empty_bins, bins ); + rast->curr_bins = NULL; +} + + /** * Rasterize commands for a single bin. * \param x, y position of the bin's tile in the framebuffer @@ -615,18 +589,23 @@ lp_rasterize_bins( struct lp_rasterizer *rast, } } - lp_rast_begin( rast, fb, - fb->cbufs[0]!= NULL, - fb->zsbuf != NULL && write_depth ); + /* save framebuffer state in the bin */ + util_copy_framebuffer_state(&bins->fb, fb); + bins->write_depth = write_depth; if (rast->num_threads == 0) { /* no threading */ + + lp_rast_begin( rast, fb, + fb->cbufs[0]!= NULL, + fb->zsbuf != NULL && write_depth ); + lp_bin_iter_begin( bins ); rasterize_bins( rast, 0, bins, write_depth ); - /* reset bins and put into the empty queue */ - lp_reset_bins( bins ); - lp_bins_enqueue( rast->empty_bins, bins); + release_bins( rast, bins ); + + lp_rast_end( rast ); } else { /* threaded rendering! */ @@ -634,11 +613,6 @@ lp_rasterize_bins( struct lp_rasterizer *rast, lp_bins_enqueue( rast->full_bins, bins ); - /* XXX need to move/fix these */ - rast->write_depth = write_depth; - - /*lp_bin_iter_begin( bins );*/ - /* signal the threads that there's work to do */ for (i = 0; i < rast->num_threads; i++) { pipe_semaphore_signal(&rast->tasks[i].work_ready); @@ -650,8 +624,6 @@ lp_rasterize_bins( struct lp_rasterizer *rast, } } - lp_rast_end( rast ); - LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__); } @@ -671,23 +643,53 @@ thread_func( void *init_data ) boolean debug = false; while (1) { - struct lp_bins *bins; - /* wait for work */ if (debug) debug_printf("thread %d waiting for work\n", task->thread_index); pipe_semaphore_wait(&task->work_ready); - bins = get_next_full_bin( rast ); - assert(bins); + if (task->thread_index == 0) { + /* thread[0]: + * - get next set of bins to rasterize + * - map the framebuffer surfaces + */ + const struct pipe_framebuffer_state *fb; + boolean write_depth; + + rast->curr_bins = lp_bins_dequeue( rast->full_bins ); + + lp_bin_iter_begin( rast->curr_bins ); + + fb = &rast->curr_bins->fb; + write_depth = rast->curr_bins->write_depth; + + lp_rast_begin( rast, fb, + fb->cbufs[0] != NULL, + fb->zsbuf != NULL && write_depth ); + } + + /* Wait for all threads to get here so that threads[1+] don't + * get a null rast->curr_bins pointer. + */ + pipe_barrier_wait( &rast->barrier ); /* do work */ if (debug) debug_printf("thread %d doing work\n", task->thread_index); rasterize_bins(rast, task->thread_index, - bins, rast->write_depth); + rast->curr_bins, rast->curr_bins->write_depth); - release_current_bin( rast ); + /* wait for all threads to finish with this set of bins */ + pipe_barrier_wait( &rast->barrier ); + + if (task->thread_index == 0) { + /* thread[0]: + * - release the bins object + * - unmap the framebuffer surfaces + */ + release_bins( rast, rast->curr_bins ); + lp_rast_end( rast ); + } /* signal done with work */ if (debug) @@ -751,6 +753,9 @@ lp_rast_create( struct pipe_screen *screen, struct lp_bins_queue *empty ) create_rast_threads(rast); + /* for synchronizing rasterization threads */ + pipe_barrier_init( &rast->barrier, rast->num_threads ); + return rast; } @@ -768,6 +773,9 @@ void lp_rast_destroy( struct lp_rasterizer *rast ) align_free(rast->tasks[i].tile.color); } + /* for synchronizing rasterization threads */ + pipe_barrier_destroy( &rast->barrier ); + FREE(rast); } diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 4ae54ac8c1..ba14fc3675 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -93,18 +93,6 @@ struct lp_rasterizer boolean clipped_tile; boolean check_for_clipped_tiles; - /** The incoming queue of filled bins to rasterize */ - struct lp_bins_queue *full_bins; - /** The outgoing queue of emptied bins to return to setup modulee */ - struct lp_bins_queue *empty_bins; - - pipe_mutex get_bin_mutex; - - /** The bins currently being rasterized by the threads */ - struct lp_bins *curr_bins; - /** Counter to determine when all threads are done with current bin */ - unsigned release_count; - /* Framebuffer stuff */ struct pipe_screen *screen; @@ -122,14 +110,22 @@ struct lp_rasterizer char clear_stencil; } state; + /** The incoming queue of filled bins to rasterize */ + struct lp_bins_queue *full_bins; + /** The outgoing queue of emptied bins to return to setup modulee */ + struct lp_bins_queue *empty_bins; + + /** The bins currently being rasterized by the threads */ + struct lp_bins *curr_bins; + /** A task object for each rasterization thread */ struct lp_rasterizer_task tasks[MAX_THREADS]; unsigned num_threads; pipe_thread threads[MAX_THREADS]; - struct lp_bins *bins; - boolean write_depth; + /** For synchronizing the rasterization threads */ + pipe_barrier barrier; }; -- cgit v1.2.3 From 92dc0f92b0f0fa2f3e4ba832ef2232169ce19ce8 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 15:00:28 -0700 Subject: llvmpipe: implement lp_rast_load_color() --- src/gallium/drivers/llvmpipe/lp_rast.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 2ea3ac6b3b..9020cf9cec 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -210,9 +210,31 @@ void lp_rast_load_color( struct lp_rasterizer *rast, unsigned thread_index, const union lp_rast_cmd_arg arg) { - LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); + struct lp_rasterizer_task *task = &rast->tasks[thread_index]; + const unsigned x = task->x; + const unsigned y = task->y; + int w = TILE_SIZE; + int h = TILE_SIZE; + + LP_DBG(DEBUG_RAST, "%s at %u, %u\n", __FUNCTION__, x, y); + + if (x + w > rast->state.fb.width) + w -= x + w - rast->state.fb.width; + + if (y + h > rast->state.fb.height) + h -= y + h - rast->state.fb.height; - /* call u_tile func to load colors from surface */ + assert(w >= 0); + assert(h >= 0); + assert(w <= TILE_SIZE); + assert(h <= TILE_SIZE); + + lp_tile_read_4ub(rast->cbuf_transfer->format, + rast->tasks[thread_index].tile.color, + rast->cbuf_map, + rast->cbuf_transfer->stride, + x, y, + w, h); } -- cgit v1.2.3 From edbaca6fd14dace44637d994bbddad3cb0a5fafe Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 17:32:08 -0700 Subject: llvmpipe: initial fence implementation --- src/gallium/drivers/llvmpipe/lp_fence.c | 109 ++++++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_fence.h | 60 ++++++++++++++++++ 2 files changed, 169 insertions(+) create mode 100644 src/gallium/drivers/llvmpipe/lp_fence.c create mode 100644 src/gallium/drivers/llvmpipe/lp_fence.h diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c b/src/gallium/drivers/llvmpipe/lp_fence.c new file mode 100644 index 0000000000..14fbea6d99 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_fence.c @@ -0,0 +1,109 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_screen.h" +#include "util/u_memory.h" +#include "lp_fence.h" + + +struct lp_fence * +lp_fence_create(unsigned rank) +{ + struct lp_fence *fence = CALLOC_STRUCT(lp_fence); + + pipe_reference_init(&fence->reference, 1); + + pipe_mutex_init(fence->mutex); + pipe_condvar_init(fence->signalled); + + fence->rank = rank; + + return fence; +} + + +static void +lp_fence_destroy(struct lp_fence *fence) +{ + pipe_mutex_destroy(fence->mutex); + pipe_condvar_destroy(fence->signalled); + FREE(fence); +} + + +static void +llvmpipe_fence_reference(struct pipe_screen *screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + struct lp_fence *old = (struct lp_fence *) *ptr; + struct lp_fence *f = (struct lp_fence *) fence; + + if (pipe_reference((struct pipe_reference**)ptr, &f->reference)) { + lp_fence_destroy(old); + } +} + + +static int +llvmpipe_fence_signalled(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + unsigned flag) +{ + struct lp_fence *f = (struct lp_fence *) fence; + + return f->count == f->rank; +} + + +static int +llvmpipe_fence_finish(struct pipe_screen *screen, + struct pipe_fence_handle *fence_handle, + unsigned flag) +{ + struct lp_fence *fence = (struct lp_fence *) fence_handle; + + pipe_mutex_lock(fence->mutex); + while (fence->count < fence->rank) { + pipe_condvar_wait(fence->signalled, fence->mutex); + } + pipe_mutex_unlock(fence->mutex); + + return 0; +} + + + + +void +llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen) +{ + screen->fence_reference = llvmpipe_fence_reference; + screen->fence_signalled = llvmpipe_fence_signalled; + screen->fence_finish = llvmpipe_fence_finish; +} diff --git a/src/gallium/drivers/llvmpipe/lp_fence.h b/src/gallium/drivers/llvmpipe/lp_fence.h new file mode 100644 index 0000000000..d45318f9e4 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_fence.h @@ -0,0 +1,60 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef LP_FENCE_H +#define LP_FENCE_H + + +#include "pipe/p_refcnt.h" +#include "pipe/p_thread.h" + + +struct pipe_screen; + + +struct lp_fence +{ + struct pipe_reference reference; + + pipe_mutex mutex; + pipe_condvar signalled; + + unsigned rank; + unsigned count; +}; + + +struct lp_fence * +lp_fence_create(unsigned rank); + + +void +llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen); + + +#endif /* LP_FENCE_H */ -- cgit v1.2.3 From 2876b684de39dbdf803b3f3d1ac231e76fb4357e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 17:32:45 -0700 Subject: llvmpipe: remove old fence code, compile new lp_fence.c file --- src/gallium/drivers/llvmpipe/Makefile | 1 + src/gallium/drivers/llvmpipe/SConscript | 1 + src/gallium/drivers/llvmpipe/lp_buffer.c | 31 ------------------------------- src/gallium/drivers/llvmpipe/lp_screen.c | 2 ++ 4 files changed, 4 insertions(+), 31 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 4cc4c88ffd..7c4cf320b9 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -32,6 +32,7 @@ C_SOURCES = \ lp_clear.c \ lp_context.c \ lp_draw_arrays.c \ + lp_fence.c \ lp_flush.c \ lp_jit.c \ lp_prim_vbuf.c \ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 19ef686167..bc725b65f6 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -45,6 +45,7 @@ llvmpipe = env.ConvenienceLibrary( 'lp_clear.c', 'lp_context.c', 'lp_draw_arrays.c', + 'lp_fence.c', 'lp_flush.c', 'lp_jit.c', 'lp_prim_vbuf.c', diff --git a/src/gallium/drivers/llvmpipe/lp_buffer.c b/src/gallium/drivers/llvmpipe/lp_buffer.c index 66f1f8e138..a5ef221a21 100644 --- a/src/gallium/drivers/llvmpipe/lp_buffer.c +++ b/src/gallium/drivers/llvmpipe/lp_buffer.c @@ -108,32 +108,6 @@ llvmpipe_user_buffer_create(struct pipe_screen *screen, } -static void -llvmpipe_fence_reference(struct pipe_screen *screen, - struct pipe_fence_handle **ptr, - struct pipe_fence_handle *fence) -{ -} - - -static int -llvmpipe_fence_signalled(struct pipe_screen *screen, - struct pipe_fence_handle *fence, - unsigned flag) -{ - return 0; -} - - -static int -llvmpipe_fence_finish(struct pipe_screen *screen, - struct pipe_fence_handle *fence, - unsigned flag) -{ - return 0; -} - - void llvmpipe_init_screen_buffer_funcs(struct pipe_screen *screen) { @@ -142,9 +116,4 @@ llvmpipe_init_screen_buffer_funcs(struct pipe_screen *screen) screen->buffer_map = llvmpipe_buffer_map; screen->buffer_unmap = llvmpipe_buffer_unmap; screen->buffer_destroy = llvmpipe_buffer_destroy; - - screen->fence_reference = llvmpipe_fence_reference; - screen->fence_signalled = llvmpipe_fence_signalled; - screen->fence_finish = llvmpipe_fence_finish; - } diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 87fddbd13f..017496ea5f 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -32,6 +32,7 @@ #include "lp_texture.h" #include "lp_buffer.h" +#include "lp_fence.h" #include "lp_winsys.h" #include "lp_jit.h" #include "lp_screen.h" @@ -253,6 +254,7 @@ llvmpipe_create_screen(struct llvmpipe_winsys *winsys) llvmpipe_init_screen_texture_funcs(&screen->base); llvmpipe_init_screen_buffer_funcs(&screen->base); + llvmpipe_init_screen_fence_funcs(&screen->base); lp_jit_screen_init(screen); -- cgit v1.2.3 From 8736ee1e7d45c2a3868d46b2ecba7471518cd9b6 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 17:33:30 -0700 Subject: llvmpipe: added lp_bin_get_num_bins() --- src/gallium/drivers/llvmpipe/lp_bin.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_bin.h b/src/gallium/drivers/llvmpipe/lp_bin.h index 565dd49f68..e763b16ffe 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin.h +++ b/src/gallium/drivers/llvmpipe/lp_bin.h @@ -258,6 +258,13 @@ lp_bin_state_command( struct lp_bins *bins, const union lp_rast_cmd_arg arg ); +static INLINE unsigned +lp_bin_get_num_bins( const struct lp_bins *bins ) +{ + return bins->tiles_x * bins->tiles_y; +} + + void lp_bin_iter_begin( struct lp_bins *bins ); -- cgit v1.2.3 From 932374073863379e9da862d6115410889f038154 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 17:45:52 -0700 Subject: llvmpipe: added lp_rast_get_num_threads() --- src/gallium/drivers/llvmpipe/lp_rast.c | 7 +++++++ src/gallium/drivers/llvmpipe/lp_rast.h | 2 ++ 2 files changed, 9 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 9020cf9cec..6b7aa8d729 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -801,3 +801,10 @@ void lp_rast_destroy( struct lp_rasterizer *rast ) FREE(rast); } + +/** Return number of rasterization threads */ +unsigned +lp_rast_get_num_threads( struct lp_rasterizer *rast ) +{ + return rast->num_threads; +} diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 75f7b74404..785be49b70 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -133,6 +133,8 @@ struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen, void lp_rast_destroy( struct lp_rasterizer * ); +unsigned lp_rast_get_num_threads( struct lp_rasterizer * ); + void lp_rasterize_bins( struct lp_rasterizer *rast, struct lp_bins *bins, const struct pipe_framebuffer_state *fb, -- cgit v1.2.3 From 4b70af918dd9040a6987c6a55e76e49f0e3f90bf Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 17:57:45 -0700 Subject: llvmpipe: added lp_rast_fence() bin function --- src/gallium/drivers/llvmpipe/lp_rast.c | 25 +++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_rast.h | 14 ++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 6b7aa8d729..3e7b3d7ab4 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -32,6 +32,7 @@ #include "lp_bin_queue.h" #include "lp_debug.h" +#include "lp_fence.h" #include "lp_state.h" #include "lp_rast.h" #include "lp_rast_priv.h" @@ -505,6 +506,30 @@ lp_rast_end_tile( struct lp_rasterizer *rast, } +/** + * Signal on a fence. This is called during bin execution/rasterization. + * Called per thread. + */ +void lp_rast_fence( struct lp_rasterizer *rast, + unsigned thread_index, + const union lp_rast_cmd_arg arg ) +{ + struct lp_fence *fence = arg.fence; + + pipe_mutex_lock( fence->mutex ); + + fence->count++; + assert(fence->count <= fence->rank); + + LP_DBG(DEBUG_RAST, "%s count=%u rank=%u\n", __FUNCTION__, + fence->count, fence->rank); + + pipe_condvar_signal( fence->signalled ); + + pipe_mutex_unlock( fence->mutex ); +} + + /** * When all the threads are done rasterizing a bin, one thread will * call this function to reset the bin and put it onto the empty queue. diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 785be49b70..bd8f1ae1c9 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -44,6 +44,7 @@ struct lp_rasterizer; struct lp_bins; struct lp_bins_queue; +struct lp_fence; struct cmd_bin; struct pipe_screen; @@ -148,6 +149,7 @@ union lp_rast_cmd_arg { const struct lp_rast_state *set_state; uint8_t clear_color[4]; unsigned clear_zstencil; + struct lp_fence *fence; }; @@ -177,6 +179,15 @@ lp_rast_arg_state( const struct lp_rast_state *state ) return arg; } +static INLINE const union lp_rast_cmd_arg +lp_rast_arg_fence( struct lp_fence *fence ) +{ + union lp_rast_cmd_arg arg; + arg.fence = fence; + return arg; +} + + static INLINE const union lp_rast_cmd_arg lp_rast_arg_null( void ) { @@ -221,5 +232,8 @@ void lp_rast_shade_tile( struct lp_rasterizer *, unsigned thread_index, const union lp_rast_cmd_arg ); +void lp_rast_fence( struct lp_rasterizer *, + unsigned thread_index, + const union lp_rast_cmd_arg ); #endif -- cgit v1.2.3 From 6cbb1219a3f6b83ee4d24aecb61f5b5943e3cac3 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 17:59:26 -0700 Subject: llvmpipe: checkpoint: plug in the new fencing code This has only been very lightly tested. More work to come. --- src/gallium/drivers/llvmpipe/lp_flush.c | 20 +++++++++++++++++--- src/gallium/drivers/llvmpipe/lp_setup.c | 23 +++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_setup.h | 4 ++++ 3 files changed, 44 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index f7a1d89701..e6519cb216 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -49,6 +49,23 @@ llvmpipe_flush( struct pipe_context *pipe, draw_flush(llvmpipe->draw); + if (fence) { + if ((flags & (PIPE_FLUSH_SWAPBUFFERS | + PIPE_FLUSH_RENDER_CACHE))) { + /* if we're going to flush the setup/rasterization modules, emit + * a fence. + * XXX this (and the code below) may need fine tuning... + */ + *fence = lp_setup_fence( llvmpipe->setup ); + } + else { + *fence = NULL; + } + } + + /* XXX the lp_setup_flush(flags) param is not a bool, and it's ignored + * at this time! + */ if (flags & PIPE_FLUSH_SWAPBUFFERS) { lp_setup_flush( llvmpipe->setup, FALSE ); } @@ -68,8 +85,5 @@ llvmpipe_flush( struct pipe_context *pipe, ++frame_no; } #endif - - if (fence) - *fence = NULL; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index d976934a5d..3967b4f21e 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -39,6 +39,7 @@ #include "lp_bin.h" #include "lp_bin_queue.h" #include "lp_debug.h" +#include "lp_fence.h" #include "lp_state.h" #include "lp_buffer.h" #include "lp_texture.h" @@ -308,6 +309,28 @@ lp_setup_clear( struct setup_context *setup, } +/** + * Emit a fence. + */ +struct pipe_fence_handle * +lp_setup_fence( struct setup_context *setup ) +{ + struct lp_bins *bins = lp_setup_get_current_bins(setup); + const unsigned rank = lp_bin_get_num_bins( bins ); + struct lp_fence *fence = lp_fence_create(rank); + + LP_DBG(DEBUG_SETUP, "%s rank %u\n", __FUNCTION__, rank); + + set_state( setup, SETUP_ACTIVE ); + + /* insert the fence into all command bins */ + lp_bin_everywhere( bins, + lp_rast_fence, + lp_rast_arg_fence(fence) ); + + return (struct pipe_fence_handle *) fence; +} + void lp_setup_set_triangle_state( struct setup_context *setup, diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 66a7f29f1e..5c606e86af 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -67,6 +67,10 @@ lp_setup_clear(struct setup_context *setup, unsigned clear_stencil, unsigned flags); +struct pipe_fence_handle * +lp_setup_fence( struct setup_context *setup ); + + void lp_setup_tri(struct setup_context *setup, const float (*v0)[4], -- cgit v1.2.3 From 314d3cd751448f9ae36126937b3bbf0330542da3 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 12 Dec 2009 20:19:46 +0000 Subject: llvmpipe: rename one of the two rasterize_bins functions --- src/gallium/drivers/llvmpipe/lp_setup.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 3967b4f21e..6d20975cb8 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -128,8 +128,8 @@ static void reset_context( struct setup_context *setup ) /** Rasterize all tile's bins */ static void -rasterize_bins( struct setup_context *setup, - boolean write_depth ) +lp_setup_rasterize_bins( struct setup_context *setup, + boolean write_depth ) { struct lp_bins *bins = lp_setup_get_current_bins(setup); @@ -189,7 +189,7 @@ execute_clears( struct setup_context *setup ) LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); begin_binning( setup ); - rasterize_bins( setup, TRUE ); + lp_setup_rasterize_bins( setup, TRUE ); } @@ -220,7 +220,7 @@ set_state( struct setup_context *setup, if (old_state == SETUP_CLEARED) execute_clears( setup ); else - rasterize_bins( setup, TRUE ); + lp_setup_rasterize_bins( setup, TRUE ); break; } -- cgit v1.2.3 From 39dd7108bf6014a8430dffc290e98c7b47432cd3 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 12 Dec 2009 20:29:39 +0000 Subject: llvmpipe: rename queue size to count --- src/gallium/drivers/llvmpipe/lp_bin_queue.c | 44 ++++++++++++++--------------- src/gallium/drivers/llvmpipe/lp_bin_queue.h | 4 +-- src/gallium/drivers/llvmpipe/lp_setup.c | 2 +- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_bin_queue.c b/src/gallium/drivers/llvmpipe/lp_bin_queue.c index b4bc439089..b39b46b72b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin_queue.c +++ b/src/gallium/drivers/llvmpipe/lp_bin_queue.c @@ -51,9 +51,9 @@ struct lp_bins_queue * probably always be pretty short. */ struct lp_bins *bins[MAX_BINS]; - unsigned size; + unsigned count; - pipe_condvar size_change; + pipe_condvar count_change; pipe_mutex mutex; }; @@ -65,7 +65,7 @@ lp_bins_queue_create(void) { struct lp_bins_queue *queue = CALLOC_STRUCT(lp_bins_queue); if (queue) { - pipe_condvar_init(queue->size_change); + pipe_condvar_init(queue->count_change); pipe_mutex_init(queue->mutex); } return queue; @@ -76,7 +76,7 @@ lp_bins_queue_create(void) void lp_bins_queue_destroy(struct lp_bins_queue *queue) { - pipe_condvar_destroy(queue->size_change); + pipe_condvar_destroy(queue->count_change); pipe_mutex_destroy(queue->mutex); } @@ -89,24 +89,24 @@ lp_bins_dequeue(struct lp_bins_queue *queue) unsigned i; pipe_mutex_lock(queue->mutex); - while (queue->size == 0) { - pipe_condvar_wait(queue->size_change, queue->mutex); + while (queue->count == 0) { + pipe_condvar_wait(queue->count_change, queue->mutex); } - assert(queue->size >= 1); + assert(queue->count >= 1); /* get head */ bins = queue->bins[0]; /* shift entries */ - for (i = 0; i < queue->size - 1; i++) { + for (i = 0; i < queue->count - 1; i++) { queue->bins[i] = queue->bins[i + 1]; } - queue->size--; + queue->count--; /* signal size change */ - pipe_condvar_signal(queue->size_change); + pipe_condvar_signal(queue->count_change); pipe_mutex_unlock(queue->mutex); @@ -120,21 +120,21 @@ lp_bins_enqueue(struct lp_bins_queue *queue, struct lp_bins *bins) { pipe_mutex_lock(queue->mutex); - assert(queue->size < MAX_BINS); + assert(queue->count < MAX_BINS); /* debug: check that bins is not already in the queue */ if (0) { unsigned i; - for (i = 0; i < queue->size; i++) { + for (i = 0; i < queue->count; i++) { assert(queue->bins[i] != bins); } } /* add to end */ - queue->bins[queue->size++] = bins; + queue->bins[queue->count++] = bins; /* signal size change */ - pipe_condvar_signal(queue->size_change); + pipe_condvar_signal(queue->count_change); pipe_mutex_unlock(queue->mutex); } @@ -142,23 +142,23 @@ lp_bins_enqueue(struct lp_bins_queue *queue, struct lp_bins *bins) /** Return number of entries in the queue */ unsigned -lp_bins_queue_size(struct lp_bins_queue *queue) +lp_bins_queue_count(struct lp_bins_queue *queue) { - unsigned sz; + unsigned count; pipe_mutex_lock(queue->mutex); - sz = queue->size; + count = queue->count; pipe_mutex_unlock(queue->mutex); - return sz; + return count; } -/** Wait until the queue as 'size' entries */ +/** Wait until the queue has exactly 'count' entries */ void -lp_bins_queue_wait_size(struct lp_bins_queue *queue, unsigned size) +lp_bins_queue_wait_count(struct lp_bins_queue *queue, unsigned count) { pipe_mutex_lock(queue->mutex); - while (queue->size != size) { - pipe_condvar_wait(queue->size_change, queue->mutex); + while (queue->count != count) { + pipe_condvar_wait(queue->count_change, queue->mutex); } pipe_mutex_unlock(queue->mutex); } diff --git a/src/gallium/drivers/llvmpipe/lp_bin_queue.h b/src/gallium/drivers/llvmpipe/lp_bin_queue.h index 8946a54158..1a0f8832db 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin_queue.h +++ b/src/gallium/drivers/llvmpipe/lp_bin_queue.h @@ -46,10 +46,10 @@ void lp_bins_enqueue(struct lp_bins_queue *queue, struct lp_bins *bins); unsigned -lp_bins_queue_size(struct lp_bins_queue *queue); +lp_bins_queue_count(struct lp_bins_queue *queue); void -lp_bins_queue_wait_size(struct lp_bins_queue *queue, unsigned size); +lp_bins_queue_wait_count(struct lp_bins_queue *queue, unsigned size); #endif /* LP_BIN_QUEUE */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 6d20975cb8..0972c16784 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -588,7 +588,7 @@ lp_setup_destroy( struct setup_context *setup ) pipe_buffer_reference(&setup->constants.current, NULL); /* free the bins in the 'empty' queue */ - while (lp_bins_queue_size(setup->empty_bins) > 0) { + while (lp_bins_queue_count(setup->empty_bins) > 0) { struct lp_bins *bins = lp_bins_dequeue(setup->empty_bins); if (!bins) break; -- cgit v1.2.3 From 663750d5564a225b4720f7ee8bea93ffb309fc88 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 13 Dec 2009 18:17:25 +0000 Subject: llvmpipe: rename bins to scene It was pretty confusing having an entity named "bin" and another named "bins", not least because sometimes there was a need to talk about >1 of the "bins" objects, which couldn't be pluralized any further... Scene is a term used in a bunch of places to talk about what a binner operates on, so it's a decent choice here. --- src/gallium/drivers/llvmpipe/SConscript | 10 +- src/gallium/drivers/llvmpipe/lp_bin.c | 310 ------------------------ src/gallium/drivers/llvmpipe/lp_bin.h | 275 --------------------- src/gallium/drivers/llvmpipe/lp_bin_queue.c | 164 ------------- src/gallium/drivers/llvmpipe/lp_bin_queue.h | 55 ----- src/gallium/drivers/llvmpipe/lp_rast.c | 96 ++++---- src/gallium/drivers/llvmpipe/lp_rast.h | 14 +- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 12 +- src/gallium/drivers/llvmpipe/lp_scene.c | 310 ++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_scene.h | 276 +++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_scene_queue.c | 164 +++++++++++++ src/gallium/drivers/llvmpipe/lp_scene_queue.h | 55 +++++ src/gallium/drivers/llvmpipe/lp_setup.c | 112 ++++----- src/gallium/drivers/llvmpipe/lp_setup_context.h | 12 +- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 36 +-- 15 files changed, 952 insertions(+), 949 deletions(-) delete mode 100644 src/gallium/drivers/llvmpipe/lp_bin.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_bin.h delete mode 100644 src/gallium/drivers/llvmpipe/lp_bin_queue.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_bin_queue.h create mode 100644 src/gallium/drivers/llvmpipe/lp_scene.c create mode 100644 src/gallium/drivers/llvmpipe/lp_scene.h create mode 100644 src/gallium/drivers/llvmpipe/lp_scene_queue.c create mode 100644 src/gallium/drivers/llvmpipe/lp_scene_queue.h diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index bc725b65f6..f0b71ef3ee 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -19,8 +19,6 @@ env.CodeGenerate( llvmpipe = env.ConvenienceLibrary( target = 'llvmpipe', source = [ - 'lp_bin.c', - 'lp_bin_queue.c', 'lp_bld_alpha.c', 'lp_bld_arit.c', 'lp_bld_blend_aos.c', @@ -35,9 +33,9 @@ llvmpipe = env.ConvenienceLibrary( 'lp_bld_format_soa.c', 'lp_bld_interp.c', 'lp_bld_intr.c', + 'lp_bld_logic.c', 'lp_bld_sample_soa.c', 'lp_bld_struct.c', - 'lp_bld_logic.c', 'lp_bld_swizzle.c', 'lp_bld_tgsi_soa.c', 'lp_bld_type.c', @@ -50,11 +48,13 @@ llvmpipe = env.ConvenienceLibrary( 'lp_jit.c', 'lp_prim_vbuf.c', 'lp_query.c', + 'lp_scene.c', + 'lp_scene_queue.c', + 'lp_screen.c', 'lp_setup.c', - 'lp_setup_tri.c', 'lp_setup_line.c', 'lp_setup_point.c', - 'lp_screen.c', + 'lp_setup_tri.c', 'lp_state_blend.c', 'lp_state_clip.c', 'lp_state_derived.c', diff --git a/src/gallium/drivers/llvmpipe/lp_bin.c b/src/gallium/drivers/llvmpipe/lp_bin.c deleted file mode 100644 index 703cdd2de5..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_bin.c +++ /dev/null @@ -1,310 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "util/u_math.h" -#include "util/u_memory.h" -#include "lp_bin.h" - - -struct lp_bins * -lp_bins_create(void) -{ - struct lp_bins *bins = CALLOC_STRUCT(lp_bins); - if (bins) - lp_init_bins(bins); - return bins; -} - - -void -lp_bins_destroy(struct lp_bins *bins) -{ - lp_reset_bins(bins); - lp_free_bin_data(bins); - FREE(bins); -} - - -void -lp_init_bins(struct lp_bins *bins) -{ - unsigned i, j; - for (i = 0; i < TILES_X; i++) - for (j = 0; j < TILES_Y; j++) { - struct cmd_bin *bin = lp_get_bin(bins, i, j); - bin->commands.head = bin->commands.tail = CALLOC_STRUCT(cmd_block); - } - - bins->data.head = - bins->data.tail = CALLOC_STRUCT(data_block); - - pipe_mutex_init(bins->mutex); -} - - -/** - * Set bins to empty state. - */ -void -lp_reset_bins(struct lp_bins *bins ) -{ - unsigned i, j; - - /* Free all but last binner command lists: - */ - for (i = 0; i < bins->tiles_x; i++) { - for (j = 0; j < bins->tiles_y; j++) { - struct cmd_bin *bin = lp_get_bin(bins, i, j); - struct cmd_block_list *list = &bin->commands; - struct cmd_block *block; - struct cmd_block *tmp; - - for (block = list->head; block != list->tail; block = tmp) { - tmp = block->next; - FREE(block); - } - - assert(list->tail->next == NULL); - list->head = list->tail; - list->head->count = 0; - } - } - - /* Free all but last binned data block: - */ - { - struct data_block_list *list = &bins->data; - struct data_block *block, *tmp; - - for (block = list->head; block != list->tail; block = tmp) { - tmp = block->next; - FREE(block); - } - - assert(list->tail->next == NULL); - list->head = list->tail; - list->head->used = 0; - } -} - - -/** - * Free all data associated with the given bin, but don't free(bins). - */ -void -lp_free_bin_data(struct lp_bins *bins) -{ - unsigned i, j; - - for (i = 0; i < TILES_X; i++) - for (j = 0; j < TILES_Y; j++) { - struct cmd_bin *bin = lp_get_bin(bins, i, j); - /* lp_reset_bins() should have been already called */ - assert(bin->commands.head == bin->commands.tail); - FREE(bin->commands.head); - bin->commands.head = NULL; - bin->commands.tail = NULL; - } - - FREE(bins->data.head); - bins->data.head = NULL; - - pipe_mutex_destroy(bins->mutex); -} - - -void -lp_bin_set_framebuffer_size( struct lp_bins *bins, - unsigned width, unsigned height ) -{ - bins->tiles_x = align(width, TILE_SIZE) / TILE_SIZE; - bins->tiles_y = align(height, TILE_SIZE) / TILE_SIZE; -} - - -void -lp_bin_new_cmd_block( struct cmd_block_list *list ) -{ - struct cmd_block *block = MALLOC_STRUCT(cmd_block); - list->tail->next = block; - list->tail = block; - block->next = NULL; - block->count = 0; -} - - -void -lp_bin_new_data_block( struct data_block_list *list ) -{ - struct data_block *block = MALLOC_STRUCT(data_block); - list->tail->next = block; - list->tail = block; - block->next = NULL; - block->used = 0; -} - - -/** Return number of bytes used for bin data */ -unsigned -lp_bin_data_size( const struct lp_bins *bins ) -{ - unsigned size = 0; - const struct data_block *block; - for (block = bins->data.head; block; block = block->next) { - size += block->used; - } - return size; -} - - -/** Return number of bytes used for a tile bin */ -unsigned -lp_bin_cmd_size( const struct lp_bins *bins, unsigned x, unsigned y ) -{ - struct cmd_bin *bin = lp_get_bin((struct lp_bins *) bins, x, y); - const struct cmd_block *cmd; - unsigned size = 0; - for (cmd = bin->commands.head; cmd; cmd = cmd->next) { - size += (cmd->count * - (sizeof(lp_rast_cmd) + sizeof(union lp_rast_cmd_arg))); - } - return size; -} - - -/** - * Return last command in the bin - */ -static lp_rast_cmd -lp_get_last_command( const struct cmd_bin *bin ) -{ - const struct cmd_block *tail = bin->commands.tail; - const unsigned i = tail->count; - if (i > 0) - return tail->cmd[i - 1]; - else - return NULL; -} - - -/** - * Replace the arg of the last command in the bin. - */ -static void -lp_replace_last_command_arg( struct cmd_bin *bin, - const union lp_rast_cmd_arg arg ) -{ - struct cmd_block *tail = bin->commands.tail; - const unsigned i = tail->count; - assert(i > 0); - tail->arg[i - 1] = arg; -} - - - -/** - * Put a state-change command into all bins. - * If we find that the last command in a bin was also a state-change - * command, we can simply replace that one with the new one. - */ -void -lp_bin_state_command( struct lp_bins *bins, - lp_rast_cmd cmd, - const union lp_rast_cmd_arg arg ) -{ - unsigned i, j; - for (i = 0; i < bins->tiles_x; i++) { - for (j = 0; j < bins->tiles_y; j++) { - struct cmd_bin *bin = lp_get_bin(bins, i, j); - lp_rast_cmd last_cmd = lp_get_last_command(bin); - if (last_cmd == cmd) { - lp_replace_last_command_arg(bin, arg); - } - else { - lp_bin_command( bins, i, j, cmd, arg ); - } - } - } -} - - -/** advance curr_x,y to the next bin */ -static boolean -next_bin(struct lp_bins *bins) -{ - bins->curr_x++; - if (bins->curr_x >= bins->tiles_x) { - bins->curr_x = 0; - bins->curr_y++; - } - if (bins->curr_y >= bins->tiles_y) { - /* no more bins */ - return FALSE; - } - return TRUE; -} - - -void -lp_bin_iter_begin( struct lp_bins *bins ) -{ - bins->curr_x = bins->curr_y = -1; -} - - -/** - * Return point to next bin to be rendered. - * The lp_bins::curr_x and ::curr_y fields will be advanced. - * Multiple rendering threads will call this function to get a chunk - * of work (a bin) to work on. - */ -struct cmd_bin * -lp_bin_iter_next( struct lp_bins *bins, int *bin_x, int *bin_y ) -{ - struct cmd_bin *bin = NULL; - - pipe_mutex_lock(bins->mutex); - - if (bins->curr_x < 0) { - /* first bin */ - bins->curr_x = 0; - bins->curr_y = 0; - } - else if (!next_bin(bins)) { - /* no more bins left */ - goto end; - } - - bin = lp_get_bin(bins, bins->curr_x, bins->curr_y); - *bin_x = bins->curr_x; - *bin_y = bins->curr_y; - -end: - /*printf("return bin %p at %d, %d\n", (void *) bin, *bin_x, *bin_y);*/ - pipe_mutex_unlock(bins->mutex); - return bin; -} diff --git a/src/gallium/drivers/llvmpipe/lp_bin.h b/src/gallium/drivers/llvmpipe/lp_bin.h deleted file mode 100644 index e763b16ffe..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_bin.h +++ /dev/null @@ -1,275 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * Binner data structures and bin-related functions. - * Note: the "setup" code is concerned with building bins while - * The "rast" code is concerned with consuming/executing bins. - */ - -#ifndef LP_BIN_H -#define LP_BIN_H - -#include "pipe/p_thread.h" -#include "lp_tile_soa.h" -#include "lp_rast.h" - - -/* We're limited to 2K by 2K for 32bit fixed point rasterization. - * Will need a 64-bit version for larger framebuffers. - */ -#define MAXHEIGHT 2048 -#define MAXWIDTH 2048 -#define TILES_X (MAXWIDTH / TILE_SIZE) -#define TILES_Y (MAXHEIGHT / TILE_SIZE) - - -#define CMD_BLOCK_MAX 128 -#define DATA_BLOCK_SIZE (16 * 1024 - sizeof(unsigned) - sizeof(void *)) - - - -/* switch to a non-pointer value for this: - */ -typedef void (*lp_rast_cmd)( struct lp_rasterizer *, - unsigned thread_index, - const union lp_rast_cmd_arg ); - -struct cmd_block { - lp_rast_cmd cmd[CMD_BLOCK_MAX]; - union lp_rast_cmd_arg arg[CMD_BLOCK_MAX]; - unsigned count; - struct cmd_block *next; -}; - -struct data_block { - ubyte data[DATA_BLOCK_SIZE]; - unsigned used; - struct data_block *next; -}; - -struct cmd_block_list { - struct cmd_block *head; - struct cmd_block *tail; -}; - -/** - * For each screen tile we have one of these bins. - */ -struct cmd_bin { - struct cmd_block_list commands; -}; - - -/** - * This stores bulk data which is shared by all bins. - * Examples include triangle data and state data. The commands in - * the per-tile bins will point to chunks of data in this structure. - */ -struct data_block_list { - struct data_block *head; - struct data_block *tail; -}; - - -/** - * All bins and bin data are contained here. - * Per-bin data goes into the 'tile' bins. - * Shared bin data goes into the 'data' buffer. - * When there are multiple threads, will want to double-buffer the - * bin arrays: - */ -struct lp_bins { - struct cmd_bin tile[TILES_X][TILES_Y]; - struct data_block_list data; - - /** the framebuffer to render the bins into */ - struct pipe_framebuffer_state fb; - - boolean write_depth; - - /** - * Number of active tiles in each dimension. - * This basically the framebuffer size divided by tile size - */ - unsigned tiles_x, tiles_y; - - int curr_x, curr_y; /**< for iterating over bins */ - pipe_mutex mutex; -}; - - - -struct lp_bins *lp_bins_create(void); - -void lp_bins_destroy(struct lp_bins *bins); - - -void lp_init_bins(struct lp_bins *bins); - -void lp_reset_bins(struct lp_bins *bins ); - -void lp_free_bin_data(struct lp_bins *bins); - -void lp_bin_set_framebuffer_size( struct lp_bins *bins, - unsigned width, unsigned height ); - -void lp_bin_new_data_block( struct data_block_list *list ); - -void lp_bin_new_cmd_block( struct cmd_block_list *list ); - -unsigned lp_bin_data_size( const struct lp_bins *bins ); - -unsigned lp_bin_cmd_size( const struct lp_bins *bins, unsigned x, unsigned y ); - - -/** - * Allocate space for a command/data in the bin's data buffer. - * Grow the block list if needed. - */ -static INLINE void * -lp_bin_alloc( struct lp_bins *bins, unsigned size) -{ - struct data_block_list *list = &bins->data; - - if (list->tail->used + size > DATA_BLOCK_SIZE) { - lp_bin_new_data_block( list ); - } - - { - struct data_block *tail = list->tail; - ubyte *data = tail->data + tail->used; - tail->used += size; - return data; - } -} - - -/** - * As above, but with specific alignment. - */ -static INLINE void * -lp_bin_alloc_aligned( struct lp_bins *bins, unsigned size, - unsigned alignment ) -{ - struct data_block_list *list = &bins->data; - - if (list->tail->used + size + alignment - 1 > DATA_BLOCK_SIZE) { - lp_bin_new_data_block( list ); - } - - { - struct data_block *tail = list->tail; - ubyte *data = tail->data + tail->used; - unsigned offset = (((uintptr_t)data + alignment - 1) & ~(alignment - 1)) - (uintptr_t)data; - tail->used += offset + size; - return data + offset; - } -} - - -/* Put back data if we decide not to use it, eg. culled triangles. - */ -static INLINE void -lp_bin_putback_data( struct lp_bins *bins, unsigned size) -{ - struct data_block_list *list = &bins->data; - assert(list->tail->used >= size); - list->tail->used -= size; -} - - -/** Return pointer to a particular tile's bin. */ -static INLINE struct cmd_bin * -lp_get_bin(struct lp_bins *bins, unsigned x, unsigned y) -{ - return &bins->tile[x][y]; -} - - - -/* Add a command to bin[x][y]. - */ -static INLINE void -lp_bin_command( struct lp_bins *bins, - unsigned x, unsigned y, - lp_rast_cmd cmd, - union lp_rast_cmd_arg arg ) -{ - struct cmd_bin *bin = lp_get_bin(bins, x, y); - struct cmd_block_list *list = &bin->commands; - - if (list->tail->count == CMD_BLOCK_MAX) { - lp_bin_new_cmd_block( list ); - } - - { - struct cmd_block *tail = list->tail; - unsigned i = tail->count; - tail->cmd[i] = cmd; - tail->arg[i] = arg; - tail->count++; - } -} - - -/* Add a command to all active bins. - */ -static INLINE void -lp_bin_everywhere( struct lp_bins *bins, - lp_rast_cmd cmd, - const union lp_rast_cmd_arg arg ) -{ - unsigned i, j; - for (i = 0; i < bins->tiles_x; i++) - for (j = 0; j < bins->tiles_y; j++) - lp_bin_command( bins, i, j, cmd, arg ); -} - - -void -lp_bin_state_command( struct lp_bins *bins, - lp_rast_cmd cmd, - const union lp_rast_cmd_arg arg ); - - -static INLINE unsigned -lp_bin_get_num_bins( const struct lp_bins *bins ) -{ - return bins->tiles_x * bins->tiles_y; -} - - -void -lp_bin_iter_begin( struct lp_bins *bins ); - -struct cmd_bin * -lp_bin_iter_next( struct lp_bins *bins, int *bin_x, int *bin_y ); - - -#endif /* LP_BIN_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bin_queue.c b/src/gallium/drivers/llvmpipe/lp_bin_queue.c deleted file mode 100644 index b39b46b72b..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_bin_queue.c +++ /dev/null @@ -1,164 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * Bin queue. We'll use two queues. One contains "full" bins which - * are produced by the "setup" code. The other contains "empty" bins - * which are produced by the "rast" code when it finishes rendering a bin. - */ - - -#include "pipe/p_thread.h" -#include "util/u_memory.h" -#include "lp_bin_queue.h" - - - -#define MAX_BINS 4 - - -/** - * A queue of bins - */ -struct lp_bins_queue -{ - /** XXX might use a linked list here somedone, but the list will - * probably always be pretty short. - */ - struct lp_bins *bins[MAX_BINS]; - unsigned count; - - pipe_condvar count_change; - pipe_mutex mutex; -}; - - - -/** Allocate a new bins queue */ -struct lp_bins_queue * -lp_bins_queue_create(void) -{ - struct lp_bins_queue *queue = CALLOC_STRUCT(lp_bins_queue); - if (queue) { - pipe_condvar_init(queue->count_change); - pipe_mutex_init(queue->mutex); - } - return queue; -} - - -/** Delete a new bins queue */ -void -lp_bins_queue_destroy(struct lp_bins_queue *queue) -{ - pipe_condvar_destroy(queue->count_change); - pipe_mutex_destroy(queue->mutex); -} - - -/** Remove first lp_bins from head of queue */ -struct lp_bins * -lp_bins_dequeue(struct lp_bins_queue *queue) -{ - struct lp_bins *bins; - unsigned i; - - pipe_mutex_lock(queue->mutex); - while (queue->count == 0) { - pipe_condvar_wait(queue->count_change, queue->mutex); - } - - assert(queue->count >= 1); - - /* get head */ - bins = queue->bins[0]; - - /* shift entries */ - for (i = 0; i < queue->count - 1; i++) { - queue->bins[i] = queue->bins[i + 1]; - } - - queue->count--; - - /* signal size change */ - pipe_condvar_signal(queue->count_change); - - pipe_mutex_unlock(queue->mutex); - - return bins; -} - - -/** Add an lp_bins to tail of queue */ -void -lp_bins_enqueue(struct lp_bins_queue *queue, struct lp_bins *bins) -{ - pipe_mutex_lock(queue->mutex); - - assert(queue->count < MAX_BINS); - - /* debug: check that bins is not already in the queue */ - if (0) { - unsigned i; - for (i = 0; i < queue->count; i++) { - assert(queue->bins[i] != bins); - } - } - - /* add to end */ - queue->bins[queue->count++] = bins; - - /* signal size change */ - pipe_condvar_signal(queue->count_change); - - pipe_mutex_unlock(queue->mutex); -} - - -/** Return number of entries in the queue */ -unsigned -lp_bins_queue_count(struct lp_bins_queue *queue) -{ - unsigned count; - pipe_mutex_lock(queue->mutex); - count = queue->count; - pipe_mutex_unlock(queue->mutex); - return count; -} - - -/** Wait until the queue has exactly 'count' entries */ -void -lp_bins_queue_wait_count(struct lp_bins_queue *queue, unsigned count) -{ - pipe_mutex_lock(queue->mutex); - while (queue->count != count) { - pipe_condvar_wait(queue->count_change, queue->mutex); - } - pipe_mutex_unlock(queue->mutex); -} diff --git a/src/gallium/drivers/llvmpipe/lp_bin_queue.h b/src/gallium/drivers/llvmpipe/lp_bin_queue.h deleted file mode 100644 index 1a0f8832db..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_bin_queue.h +++ /dev/null @@ -1,55 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef LP_BIN_QUEUE -#define LP_BIN_QUEUE - -struct lp_bin_queue; -struct lp_bins; - - -struct lp_bins_queue * -lp_bins_queue_create(void); - -void -lp_bins_queue_destroy(struct lp_bins_queue *queue); - -struct lp_bins * -lp_bins_dequeue(struct lp_bins_queue *queue); - -void -lp_bins_enqueue(struct lp_bins_queue *queue, struct lp_bins *bins); - -unsigned -lp_bins_queue_count(struct lp_bins_queue *queue); - -void -lp_bins_queue_wait_count(struct lp_bins_queue *queue, unsigned size); - - -#endif /* LP_BIN_QUEUE */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 3e7b3d7ab4..fd9cd67d85 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -30,7 +30,7 @@ #include "util/u_cpu_detect.h" #include "util/u_surface.h" -#include "lp_bin_queue.h" +#include "lp_scene_queue.h" #include "lp_debug.h" #include "lp_fence.h" #include "lp_state.h" @@ -38,7 +38,7 @@ #include "lp_rast_priv.h" #include "lp_tile_soa.h" #include "lp_bld_debug.h" -#include "lp_bin.h" +#include "lp_scene.h" /** @@ -531,18 +531,18 @@ void lp_rast_fence( struct lp_rasterizer *rast, /** - * When all the threads are done rasterizing a bin, one thread will - * call this function to reset the bin and put it onto the empty queue. + * When all the threads are done rasterizing a scene, one thread will + * call this function to reset the scene and put it onto the empty queue. */ static void -release_bins( struct lp_rasterizer *rast, - struct lp_bins *bins ) +release_scene( struct lp_rasterizer *rast, + struct lp_scene *scene ) { - util_unreference_framebuffer_state( &bins->fb ); + util_unreference_framebuffer_state( &scene->fb ); - lp_reset_bins( bins ); - lp_bins_enqueue( rast->empty_bins, bins ); - rast->curr_bins = NULL; + lp_scene_reset( scene ); + lp_scene_enqueue( rast->empty_scenes, scene ); + rast->curr_scene = NULL; } @@ -576,22 +576,22 @@ rasterize_bin( struct lp_rasterizer *rast, /** - * Rasterize/execute all bins. + * Rasterize/execute all bins within a scene. * Called per thread. */ static void -rasterize_bins( struct lp_rasterizer *rast, +rasterize_scene( struct lp_rasterizer *rast, unsigned thread_index, - struct lp_bins *bins, + struct lp_scene *scene, bool write_depth ) { - /* loop over tile bins, rasterize each */ + /* loop over scene bins, rasterize each */ #if 0 { unsigned i, j; - for (i = 0; i < bins->tiles_x; i++) { - for (j = 0; j < bins->tiles_y; j++) { - struct cmd_bin *bin = lp_get_bin(bins, i, j); + for (i = 0; i < scene->tiles_x; i++) { + for (j = 0; j < scene->tiles_y; j++) { + struct cmd_bin *bin = lp_get_bin(scene, i, j); rasterize_bin( rast, thread_index, bin, i * TILE_SIZE, j * TILE_SIZE ); } @@ -602,8 +602,8 @@ rasterize_bins( struct lp_rasterizer *rast, struct cmd_bin *bin; int x, y; - assert(bins); - while ((bin = lp_bin_iter_next(bins, &x, &y))) { + assert(scene); + while ((bin = lp_scene_bin_iter_next(scene, &x, &y))) { rasterize_bin( rast, thread_index, bin, x * TILE_SIZE, y * TILE_SIZE); } } @@ -615,8 +615,8 @@ rasterize_bins( struct lp_rasterizer *rast, * Called by setup module when it has something for us to render. */ void -lp_rasterize_bins( struct lp_rasterizer *rast, - struct lp_bins *bins, +lp_rasterize_scene( struct lp_rasterizer *rast, + struct lp_scene *scene, const struct pipe_framebuffer_state *fb, bool write_depth ) { @@ -626,19 +626,19 @@ lp_rasterize_bins( struct lp_rasterizer *rast, if (debug) { unsigned x, y; - printf("rasterize bins:\n"); - printf(" data size: %u\n", lp_bin_data_size(bins)); - for (y = 0; y < bins->tiles_y; y++) { - for (x = 0; x < bins->tiles_x; x++) { + printf("rasterize scene:\n"); + printf(" data size: %u\n", lp_scene_data_size(scene)); + for (y = 0; y < scene->tiles_y; y++) { + for (x = 0; x < scene->tiles_x; x++) { printf(" bin %u, %u size: %u\n", x, y, - lp_bin_cmd_size(bins, x, y)); + lp_scene_bin_size(scene, x, y)); } } } /* save framebuffer state in the bin */ - util_copy_framebuffer_state(&bins->fb, fb); - bins->write_depth = write_depth; + util_copy_framebuffer_state(&scene->fb, fb); + scene->write_depth = write_depth; if (rast->num_threads == 0) { /* no threading */ @@ -647,10 +647,10 @@ lp_rasterize_bins( struct lp_rasterizer *rast, fb->cbufs[0]!= NULL, fb->zsbuf != NULL && write_depth ); - lp_bin_iter_begin( bins ); - rasterize_bins( rast, 0, bins, write_depth ); + lp_scene_bin_iter_begin( scene ); + rasterize_scene( rast, 0, scene, write_depth ); - release_bins( rast, bins ); + release_scene( rast, scene ); lp_rast_end( rast ); } @@ -658,7 +658,7 @@ lp_rasterize_bins( struct lp_rasterizer *rast, /* threaded rendering! */ unsigned i; - lp_bins_enqueue( rast->full_bins, bins ); + lp_scene_enqueue( rast->full_scenes, scene ); /* signal the threads that there's work to do */ for (i = 0; i < rast->num_threads; i++) { @@ -697,18 +697,18 @@ thread_func( void *init_data ) if (task->thread_index == 0) { /* thread[0]: - * - get next set of bins to rasterize + * - get next scene to rasterize * - map the framebuffer surfaces */ const struct pipe_framebuffer_state *fb; boolean write_depth; - rast->curr_bins = lp_bins_dequeue( rast->full_bins ); + rast->curr_scene = lp_scene_dequeue( rast->full_scenes ); - lp_bin_iter_begin( rast->curr_bins ); + lp_scene_bin_iter_begin( rast->curr_scene ); - fb = &rast->curr_bins->fb; - write_depth = rast->curr_bins->write_depth; + fb = &rast->curr_scene->fb; + write_depth = rast->curr_scene->write_depth; lp_rast_begin( rast, fb, fb->cbufs[0] != NULL, @@ -716,25 +716,27 @@ thread_func( void *init_data ) } /* Wait for all threads to get here so that threads[1+] don't - * get a null rast->curr_bins pointer. + * get a null rast->curr_scene pointer. */ pipe_barrier_wait( &rast->barrier ); /* do work */ if (debug) debug_printf("thread %d doing work\n", task->thread_index); - rasterize_bins(rast, task->thread_index, - rast->curr_bins, rast->curr_bins->write_depth); + rasterize_scene(rast, + task->thread_index, + rast->curr_scene, + rast->curr_scene->write_depth); - /* wait for all threads to finish with this set of bins */ + /* wait for all threads to finish with this scene */ pipe_barrier_wait( &rast->barrier ); if (task->thread_index == 0) { /* thread[0]: - * - release the bins object + * - release the scene object * - unmap the framebuffer surfaces */ - release_bins( rast, rast->curr_bins ); + release_scene( rast, rast->curr_scene ); lp_rast_end( rast ); } @@ -773,11 +775,11 @@ create_rast_threads(struct lp_rasterizer *rast) /** * Create new lp_rasterizer. - * \param empty the queue to put empty bins on after we've finished + * \param empty the queue to put empty scenes on after we've finished * processing them. */ struct lp_rasterizer * -lp_rast_create( struct pipe_screen *screen, struct lp_bins_queue *empty ) +lp_rast_create( struct pipe_screen *screen, struct lp_scene_queue *empty ) { struct lp_rasterizer *rast; unsigned i; @@ -788,8 +790,8 @@ lp_rast_create( struct pipe_screen *screen, struct lp_bins_queue *empty ) rast->screen = screen; - rast->empty_bins = empty; - rast->full_bins = lp_bins_queue_create(); + rast->empty_scenes = empty; + rast->full_scenes = lp_scene_queue_create(); for (i = 0; i < Elements(rast->tasks); i++) { rast->tasks[i].tile.color = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index bd8f1ae1c9..2dd0193d8d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -42,8 +42,8 @@ struct lp_rasterizer; -struct lp_bins; -struct lp_bins_queue; +struct lp_scene; +struct lp_scene_queue; struct lp_fence; struct cmd_bin; struct pipe_screen; @@ -130,16 +130,16 @@ struct lp_rast_triangle { struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen, - struct lp_bins_queue *empty ); + struct lp_scene_queue *empty ); void lp_rast_destroy( struct lp_rasterizer * ); unsigned lp_rast_get_num_threads( struct lp_rasterizer * ); -void lp_rasterize_bins( struct lp_rasterizer *rast, - struct lp_bins *bins, - const struct pipe_framebuffer_state *fb, - bool write_depth ); +void lp_rasterize_scene( struct lp_rasterizer *rast, + struct lp_scene *scene, + const struct pipe_framebuffer_state *fb, + bool write_depth ); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index ba14fc3675..79a90f6610 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -110,13 +110,13 @@ struct lp_rasterizer char clear_stencil; } state; - /** The incoming queue of filled bins to rasterize */ - struct lp_bins_queue *full_bins; - /** The outgoing queue of emptied bins to return to setup modulee */ - struct lp_bins_queue *empty_bins; + /** The incoming queue of scenes ready to rasterize */ + struct lp_scene_queue *full_scenes; + /** The outgoing queue of processed scenes to return to setup modulee */ + struct lp_scene_queue *empty_scenes; - /** The bins currently being rasterized by the threads */ - struct lp_bins *curr_bins; + /** The scene currently being rasterized by the threads */ + struct lp_scene *curr_scene; /** A task object for each rasterization thread */ struct lp_rasterizer_task tasks[MAX_THREADS]; diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c new file mode 100644 index 0000000000..774a1fecd7 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -0,0 +1,310 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "lp_scene.h" + + +struct lp_scene * +lp_scene_create(void) +{ + struct lp_scene *scene = CALLOC_STRUCT(lp_scene); + if (scene) + lp_scene_init(scene); + return scene; +} + + +void +lp_scene_destroy(struct lp_scene *scene) +{ + lp_scene_reset(scene); + lp_scene_free_bin_data(scene); + FREE(scene); +} + + +void +lp_scene_init(struct lp_scene *scene) +{ + unsigned i, j; + for (i = 0; i < TILES_X; i++) + for (j = 0; j < TILES_Y; j++) { + struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); + bin->commands.head = bin->commands.tail = CALLOC_STRUCT(cmd_block); + } + + scene->data.head = + scene->data.tail = CALLOC_STRUCT(data_block); + + pipe_mutex_init(scene->mutex); +} + + +/** + * Set scene to empty state. + */ +void +lp_scene_reset(struct lp_scene *scene ) +{ + unsigned i, j; + + /* Free all but last binner command lists: + */ + for (i = 0; i < scene->tiles_x; i++) { + for (j = 0; j < scene->tiles_y; j++) { + struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); + struct cmd_block_list *list = &bin->commands; + struct cmd_block *block; + struct cmd_block *tmp; + + for (block = list->head; block != list->tail; block = tmp) { + tmp = block->next; + FREE(block); + } + + assert(list->tail->next == NULL); + list->head = list->tail; + list->head->count = 0; + } + } + + /* Free all but last binned data block: + */ + { + struct data_block_list *list = &scene->data; + struct data_block *block, *tmp; + + for (block = list->head; block != list->tail; block = tmp) { + tmp = block->next; + FREE(block); + } + + assert(list->tail->next == NULL); + list->head = list->tail; + list->head->used = 0; + } +} + + +/** + * Free all data associated with the given bin, but don't free(scene). + */ +void +lp_scene_free_bin_data(struct lp_scene *scene) +{ + unsigned i, j; + + for (i = 0; i < TILES_X; i++) + for (j = 0; j < TILES_Y; j++) { + struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); + /* lp_reset_scene() should have been already called */ + assert(bin->commands.head == bin->commands.tail); + FREE(bin->commands.head); + bin->commands.head = NULL; + bin->commands.tail = NULL; + } + + FREE(scene->data.head); + scene->data.head = NULL; + + pipe_mutex_destroy(scene->mutex); +} + + +void +lp_scene_set_framebuffer_size( struct lp_scene *scene, + unsigned width, unsigned height ) +{ + scene->tiles_x = align(width, TILE_SIZE) / TILE_SIZE; + scene->tiles_y = align(height, TILE_SIZE) / TILE_SIZE; +} + + +void +lp_bin_new_cmd_block( struct cmd_block_list *list ) +{ + struct cmd_block *block = MALLOC_STRUCT(cmd_block); + list->tail->next = block; + list->tail = block; + block->next = NULL; + block->count = 0; +} + + +void +lp_bin_new_data_block( struct data_block_list *list ) +{ + struct data_block *block = MALLOC_STRUCT(data_block); + list->tail->next = block; + list->tail = block; + block->next = NULL; + block->used = 0; +} + + +/** Return number of bytes used for all bin data within a scene */ +unsigned +lp_scene_data_size( const struct lp_scene *scene ) +{ + unsigned size = 0; + const struct data_block *block; + for (block = scene->data.head; block; block = block->next) { + size += block->used; + } + return size; +} + + +/** Return number of bytes used for a single bin */ +unsigned +lp_scene_bin_size( const struct lp_scene *scene, unsigned x, unsigned y ) +{ + struct cmd_bin *bin = lp_scene_get_bin((struct lp_scene *) scene, x, y); + const struct cmd_block *cmd; + unsigned size = 0; + for (cmd = bin->commands.head; cmd; cmd = cmd->next) { + size += (cmd->count * + (sizeof(lp_rast_cmd) + sizeof(union lp_rast_cmd_arg))); + } + return size; +} + + +/** + * Return last command in the bin + */ +static lp_rast_cmd +lp_get_last_command( const struct cmd_bin *bin ) +{ + const struct cmd_block *tail = bin->commands.tail; + const unsigned i = tail->count; + if (i > 0) + return tail->cmd[i - 1]; + else + return NULL; +} + + +/** + * Replace the arg of the last command in the bin. + */ +static void +lp_replace_last_command_arg( struct cmd_bin *bin, + const union lp_rast_cmd_arg arg ) +{ + struct cmd_block *tail = bin->commands.tail; + const unsigned i = tail->count; + assert(i > 0); + tail->arg[i - 1] = arg; +} + + + +/** + * Put a state-change command into all bins. + * If we find that the last command in a bin was also a state-change + * command, we can simply replace that one with the new one. + */ +void +lp_scene_bin_state_command( struct lp_scene *scene, + lp_rast_cmd cmd, + const union lp_rast_cmd_arg arg ) +{ + unsigned i, j; + for (i = 0; i < scene->tiles_x; i++) { + for (j = 0; j < scene->tiles_y; j++) { + struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); + lp_rast_cmd last_cmd = lp_get_last_command(bin); + if (last_cmd == cmd) { + lp_replace_last_command_arg(bin, arg); + } + else { + lp_scene_bin_command( scene, i, j, cmd, arg ); + } + } + } +} + + +/** advance curr_x,y to the next bin */ +static boolean +next_bin(struct lp_scene *scene) +{ + scene->curr_x++; + if (scene->curr_x >= scene->tiles_x) { + scene->curr_x = 0; + scene->curr_y++; + } + if (scene->curr_y >= scene->tiles_y) { + /* no more bins */ + return FALSE; + } + return TRUE; +} + + +void +lp_scene_bin_iter_begin( struct lp_scene *scene ) +{ + scene->curr_x = scene->curr_y = -1; +} + + +/** + * Return point to next bin to be rendered. + * The lp_scene::curr_x and ::curr_y fields will be advanced. + * Multiple rendering threads will call this function to get a chunk + * of work (a bin) to work on. + */ +struct cmd_bin * +lp_scene_bin_iter_next( struct lp_scene *scene, int *bin_x, int *bin_y ) +{ + struct cmd_bin *bin = NULL; + + pipe_mutex_lock(scene->mutex); + + if (scene->curr_x < 0) { + /* first bin */ + scene->curr_x = 0; + scene->curr_y = 0; + } + else if (!next_bin(scene)) { + /* no more bins left */ + goto end; + } + + bin = lp_scene_get_bin(scene, scene->curr_x, scene->curr_y); + *bin_x = scene->curr_x; + *bin_y = scene->curr_y; + +end: + /*printf("return bin %p at %d, %d\n", (void *) bin, *bin_x, *bin_y);*/ + pipe_mutex_unlock(scene->mutex); + return bin; +} diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h new file mode 100644 index 0000000000..796fc516cc --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -0,0 +1,276 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Binner data structures and bin-related functions. + * Note: the "setup" code is concerned with building scenes while + * The "rast" code is concerned with consuming/executing scenes. + */ + +#ifndef LP_SCENE_H +#define LP_SCENE_H + +#include "pipe/p_thread.h" +#include "lp_tile_soa.h" +#include "lp_rast.h" + + +/* We're limited to 2K by 2K for 32bit fixed point rasterization. + * Will need a 64-bit version for larger framebuffers. + */ +#define MAXHEIGHT 2048 +#define MAXWIDTH 2048 +#define TILES_X (MAXWIDTH / TILE_SIZE) +#define TILES_Y (MAXHEIGHT / TILE_SIZE) + + +#define CMD_BLOCK_MAX 128 +#define DATA_BLOCK_SIZE (16 * 1024 - sizeof(unsigned) - sizeof(void *)) + + + +/* switch to a non-pointer value for this: + */ +typedef void (*lp_rast_cmd)( struct lp_rasterizer *, + unsigned thread_index, + const union lp_rast_cmd_arg ); + +struct cmd_block { + lp_rast_cmd cmd[CMD_BLOCK_MAX]; + union lp_rast_cmd_arg arg[CMD_BLOCK_MAX]; + unsigned count; + struct cmd_block *next; +}; + +struct data_block { + ubyte data[DATA_BLOCK_SIZE]; + unsigned used; + struct data_block *next; +}; + +struct cmd_block_list { + struct cmd_block *head; + struct cmd_block *tail; +}; + +/** + * For each screen tile we have one of these bins. + */ +struct cmd_bin { + struct cmd_block_list commands; +}; + + +/** + * This stores bulk data which is shared by all bins within a scene. + * Examples include triangle data and state data. The commands in + * the per-tile bins will point to chunks of data in this structure. + */ +struct data_block_list { + struct data_block *head; + struct data_block *tail; +}; + + +/** + * All bins and bin data are contained here. + * Per-bin data goes into the 'tile' bins. + * Shared data goes into the 'data' buffer. + * + * When there are multiple threads, will want to double-buffer between + * scenes: + */ +struct lp_scene { + struct cmd_bin tile[TILES_X][TILES_Y]; + struct data_block_list data; + + /** the framebuffer to render the scene into */ + struct pipe_framebuffer_state fb; + + boolean write_depth; + + /** + * Number of active tiles in each dimension. + * This basically the framebuffer size divided by tile size + */ + unsigned tiles_x, tiles_y; + + int curr_x, curr_y; /**< for iterating over bins */ + pipe_mutex mutex; +}; + + + +struct lp_scene *lp_scene_create(void); + +void lp_scene_destroy(struct lp_scene *scene); + + +void lp_scene_init(struct lp_scene *scene); + +void lp_scene_reset(struct lp_scene *scene ); + +void lp_scene_free_bin_data(struct lp_scene *scene); + +void lp_scene_set_framebuffer_size( struct lp_scene *scene, + unsigned width, unsigned height ); + +void lp_bin_new_data_block( struct data_block_list *list ); + +void lp_bin_new_cmd_block( struct cmd_block_list *list ); + +unsigned lp_scene_data_size( const struct lp_scene *scene ); + +unsigned lp_scene_bin_size( const struct lp_scene *scene, unsigned x, unsigned y ); + + +/** + * Allocate space for a command/data in the bin's data buffer. + * Grow the block list if needed. + */ +static INLINE void * +lp_scene_alloc( struct lp_scene *scene, unsigned size) +{ + struct data_block_list *list = &scene->data; + + if (list->tail->used + size > DATA_BLOCK_SIZE) { + lp_bin_new_data_block( list ); + } + + { + struct data_block *tail = list->tail; + ubyte *data = tail->data + tail->used; + tail->used += size; + return data; + } +} + + +/** + * As above, but with specific alignment. + */ +static INLINE void * +lp_scene_alloc_aligned( struct lp_scene *scene, unsigned size, + unsigned alignment ) +{ + struct data_block_list *list = &scene->data; + + if (list->tail->used + size + alignment - 1 > DATA_BLOCK_SIZE) { + lp_bin_new_data_block( list ); + } + + { + struct data_block *tail = list->tail; + ubyte *data = tail->data + tail->used; + unsigned offset = (((uintptr_t)data + alignment - 1) & ~(alignment - 1)) - (uintptr_t)data; + tail->used += offset + size; + return data + offset; + } +} + + +/* Put back data if we decide not to use it, eg. culled triangles. + */ +static INLINE void +lp_scene_putback_data( struct lp_scene *scene, unsigned size) +{ + struct data_block_list *list = &scene->data; + assert(list->tail->used >= size); + list->tail->used -= size; +} + + +/** Return pointer to a particular tile's bin. */ +static INLINE struct cmd_bin * +lp_scene_get_bin(struct lp_scene *scene, unsigned x, unsigned y) +{ + return &scene->tile[x][y]; +} + + + +/* Add a command to bin[x][y]. + */ +static INLINE void +lp_scene_bin_command( struct lp_scene *scene, + unsigned x, unsigned y, + lp_rast_cmd cmd, + union lp_rast_cmd_arg arg ) +{ + struct cmd_bin *bin = lp_scene_get_bin(scene, x, y); + struct cmd_block_list *list = &bin->commands; + + if (list->tail->count == CMD_BLOCK_MAX) { + lp_bin_new_cmd_block( list ); + } + + { + struct cmd_block *tail = list->tail; + unsigned i = tail->count; + tail->cmd[i] = cmd; + tail->arg[i] = arg; + tail->count++; + } +} + + +/* Add a command to all active bins. + */ +static INLINE void +lp_scene_bin_everywhere( struct lp_scene *scene, + lp_rast_cmd cmd, + const union lp_rast_cmd_arg arg ) +{ + unsigned i, j; + for (i = 0; i < scene->tiles_x; i++) + for (j = 0; j < scene->tiles_y; j++) + lp_scene_bin_command( scene, i, j, cmd, arg ); +} + + +void +lp_scene_bin_state_command( struct lp_scene *scene, + lp_rast_cmd cmd, + const union lp_rast_cmd_arg arg ); + + +static INLINE unsigned +lp_scene_get_num_bins( const struct lp_scene *scene ) +{ + return scene->tiles_x * scene->tiles_y; +} + + +void +lp_scene_bin_iter_begin( struct lp_scene *scene ); + +struct cmd_bin * +lp_scene_bin_iter_next( struct lp_scene *scene, int *bin_x, int *bin_y ); + + +#endif /* LP_BIN_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_scene_queue.c b/src/gallium/drivers/llvmpipe/lp_scene_queue.c new file mode 100644 index 0000000000..8d65a6a6fa --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_scene_queue.c @@ -0,0 +1,164 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Scene queue. We'll use two queues. One contains "full" scenes which + * are produced by the "setup" code. The other contains "empty" scenes + * which are produced by the "rast" code when it finishes rendering a scene. + */ + + +#include "pipe/p_thread.h" +#include "util/u_memory.h" +#include "lp_scene_queue.h" + + + +#define MAX_SCENE_QUEUE 4 + + +/** + * A queue of scenes + */ +struct lp_scene_queue +{ + /** XXX might use a linked list here somedone, but the list will + * probably always be pretty short. + */ + struct lp_scene *scenes[MAX_SCENE_QUEUE]; + unsigned count; + + pipe_condvar count_change; + pipe_mutex mutex; +}; + + + +/** Allocate a new scene queue */ +struct lp_scene_queue * +lp_scene_queue_create(void) +{ + struct lp_scene_queue *queue = CALLOC_STRUCT(lp_scene_queue); + if (queue) { + pipe_condvar_init(queue->count_change); + pipe_mutex_init(queue->mutex); + } + return queue; +} + + +/** Delete a scene queue */ +void +lp_scene_queue_destroy(struct lp_scene_queue *queue) +{ + pipe_condvar_destroy(queue->count_change); + pipe_mutex_destroy(queue->mutex); +} + + +/** Remove first lp_scene from head of queue */ +struct lp_scene * +lp_scene_dequeue(struct lp_scene_queue *queue) +{ + struct lp_scene *scene; + unsigned i; + + pipe_mutex_lock(queue->mutex); + while (queue->count == 0) { + pipe_condvar_wait(queue->count_change, queue->mutex); + } + + assert(queue->count >= 1); + + /* get head */ + scene = queue->scenes[0]; + + /* shift entries */ + for (i = 0; i < queue->count - 1; i++) { + queue->scenes[i] = queue->scenes[i + 1]; + } + + queue->count--; + + /* signal size change */ + pipe_condvar_signal(queue->count_change); + + pipe_mutex_unlock(queue->mutex); + + return scene; +} + + +/** Add an lp_scene to tail of queue */ +void +lp_scene_enqueue(struct lp_scene_queue *queue, struct lp_scene *scene) +{ + pipe_mutex_lock(queue->mutex); + + assert(queue->count < MAX_SCENE_QUEUE); + + /* debug: check that scene is not already in the queue */ + if (0) { + unsigned i; + for (i = 0; i < queue->count; i++) { + assert(queue->scenes[i] != scene); + } + } + + /* add to end */ + queue->scenes[queue->count++] = scene; + + /* signal size change */ + pipe_condvar_signal(queue->count_change); + + pipe_mutex_unlock(queue->mutex); +} + + +/** Return number of entries in the queue */ +unsigned +lp_scene_queue_count(struct lp_scene_queue *queue) +{ + unsigned count; + pipe_mutex_lock(queue->mutex); + count = queue->count; + pipe_mutex_unlock(queue->mutex); + return count; +} + + +/** Wait until the queue has exactly 'count' entries */ +void +lp_scene_queue_wait_count(struct lp_scene_queue *queue, unsigned count) +{ + pipe_mutex_lock(queue->mutex); + while (queue->count != count) { + pipe_condvar_wait(queue->count_change, queue->mutex); + } + pipe_mutex_unlock(queue->mutex); +} diff --git a/src/gallium/drivers/llvmpipe/lp_scene_queue.h b/src/gallium/drivers/llvmpipe/lp_scene_queue.h new file mode 100644 index 0000000000..1bd475fa50 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_scene_queue.h @@ -0,0 +1,55 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef LP_SCENE_QUEUE +#define LP_SCENE_QUEUE + +struct lp_scene_queue; +struct lp_scene; + + +struct lp_scene_queue * +lp_scene_queue_create(void); + +void +lp_scene_queue_destroy(struct lp_scene_queue *queue); + +struct lp_scene * +lp_scene_dequeue(struct lp_scene_queue *queue); + +void +lp_scene_enqueue(struct lp_scene_queue *queue, struct lp_scene *bins); + +unsigned +lp_scene_queue_count(struct lp_scene_queue *queue); + +void +lp_scene_queue_wait_count(struct lp_scene_queue *queue, unsigned size); + + +#endif /* LP_BIN_QUEUE */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 0972c16784..76e0955237 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -36,8 +36,8 @@ #include "pipe/p_inlines.h" #include "util/u_memory.h" #include "util/u_pack_color.h" -#include "lp_bin.h" -#include "lp_bin_queue.h" +#include "lp_scene.h" +#include "lp_scene_queue.h" #include "lp_debug.h" #include "lp_fence.h" #include "lp_state.h" @@ -47,26 +47,26 @@ /** XXX temporary value, temporary here */ -#define MAX_BINS 2 +#define MAX_SCENES 2 static void set_state( struct setup_context *, unsigned ); -struct lp_bins * -lp_setup_get_current_bins(struct setup_context *setup) +struct lp_scene * +lp_setup_get_current_scene(struct setup_context *setup) { - if (!setup->bins) { + if (!setup->scene) { /* wait for a free/empty bin */ - setup->bins = lp_bins_dequeue(setup->empty_bins); - if(0)lp_reset_bins( setup->bins ); /* XXX temporary? */ + setup->scene = lp_scene_dequeue(setup->empty_scenes); + if(0)lp_scene_reset( setup->scene ); /* XXX temporary? */ if (setup->fb) { - lp_bin_set_framebuffer_size(setup->bins, + lp_scene_set_framebuffer_size(setup->scene, setup->fb->width, setup->fb->height); } } - return setup->bins; + return setup->scene; } @@ -111,7 +111,7 @@ static void reset_context( struct setup_context *setup ) setup->dirty = ~0; /* no current bin */ - setup->bins = NULL; + setup->scene = NULL; /* Reset some state: */ @@ -126,15 +126,15 @@ static void reset_context( struct setup_context *setup ) } -/** Rasterize all tile's bins */ +/** Rasterize all scene's bins */ static void -lp_setup_rasterize_bins( struct setup_context *setup, +lp_setup_rasterize_scene( struct setup_context *setup, boolean write_depth ) { - struct lp_bins *bins = lp_setup_get_current_bins(setup); + struct lp_scene *scene = lp_setup_get_current_scene(setup); - lp_rasterize_bins(setup->rast, - bins, + lp_rasterize_scene(setup->rast, + scene, setup->fb, write_depth); @@ -148,28 +148,28 @@ lp_setup_rasterize_bins( struct setup_context *setup, static void begin_binning( struct setup_context *setup ) { - struct lp_bins *bins = lp_setup_get_current_bins(setup); + struct lp_scene *scene = lp_setup_get_current_scene(setup); LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); if (setup->fb->cbufs[0]) { if (setup->clear.flags & PIPE_CLEAR_COLOR) - lp_bin_everywhere( bins, + lp_scene_bin_everywhere( scene, lp_rast_clear_color, setup->clear.color ); else - lp_bin_everywhere( bins, + lp_scene_bin_everywhere( scene, lp_rast_load_color, lp_rast_arg_null() ); } if (setup->fb->zsbuf) { if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) - lp_bin_everywhere( bins, + lp_scene_bin_everywhere( scene, lp_rast_clear_zstencil, setup->clear.zstencil ); else - lp_bin_everywhere( bins, + lp_scene_bin_everywhere( scene, lp_rast_load_zstencil, lp_rast_arg_null() ); } @@ -189,7 +189,7 @@ execute_clears( struct setup_context *setup ) LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); begin_binning( setup ); - lp_setup_rasterize_bins( setup, TRUE ); + lp_setup_rasterize_scene( setup, TRUE ); } @@ -220,7 +220,7 @@ set_state( struct setup_context *setup, if (old_state == SETUP_CLEARED) execute_clears( setup ); else - lp_setup_rasterize_bins( setup, TRUE ); + lp_setup_rasterize_scene( setup, TRUE ); break; } @@ -242,7 +242,7 @@ void lp_setup_bind_framebuffer( struct setup_context *setup, const struct pipe_framebuffer_state *fb ) { - struct lp_bins *bins = lp_setup_get_current_bins(setup); + struct lp_scene *scene = lp_setup_get_current_scene(setup); LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -250,7 +250,7 @@ lp_setup_bind_framebuffer( struct setup_context *setup, setup->fb = fb; - lp_bin_set_framebuffer_size(bins, setup->fb->width, setup->fb->height); + lp_scene_set_framebuffer_size(scene, setup->fb->width, setup->fb->height); } @@ -261,7 +261,7 @@ lp_setup_clear( struct setup_context *setup, unsigned stencil, unsigned flags ) { - struct lp_bins *bins = lp_setup_get_current_bins(setup); + struct lp_scene *scene = lp_setup_get_current_scene(setup); unsigned i; LP_DBG(DEBUG_SETUP, "%s state %d\n", __FUNCTION__, setup->state); @@ -280,19 +280,19 @@ lp_setup_clear( struct setup_context *setup, } if (setup->state == SETUP_ACTIVE) { - /* Add the clear to existing bins. In the unusual case where + /* Add the clear to existing scene. In the unusual case where * both color and depth-stencil are being cleared when there's * already been some rendering, we could discard the currently * binned scene and start again, but I don't see that as being * a common usage. */ if (flags & PIPE_CLEAR_COLOR) - lp_bin_everywhere( bins, + lp_scene_bin_everywhere( scene, lp_rast_clear_color, setup->clear.color ); if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) - lp_bin_everywhere( bins, + lp_scene_bin_everywhere( scene, lp_rast_clear_zstencil, setup->clear.zstencil ); } @@ -315,8 +315,8 @@ lp_setup_clear( struct setup_context *setup, struct pipe_fence_handle * lp_setup_fence( struct setup_context *setup ) { - struct lp_bins *bins = lp_setup_get_current_bins(setup); - const unsigned rank = lp_bin_get_num_bins( bins ); + struct lp_scene *scene = lp_setup_get_current_scene(setup); + const unsigned rank = lp_scene_get_num_bins( scene ); /* xxx */ struct lp_fence *fence = lp_fence_create(rank); LP_DBG(DEBUG_SETUP, "%s rank %u\n", __FUNCTION__, rank); @@ -324,9 +324,9 @@ lp_setup_fence( struct setup_context *setup ) set_state( setup, SETUP_ACTIVE ); /* insert the fence into all command bins */ - lp_bin_everywhere( bins, - lp_rast_fence, - lp_rast_arg_fence(fence) ); + lp_scene_bin_everywhere( scene, + lp_rast_fence, + lp_rast_arg_fence(fence) ); return (struct pipe_fence_handle *) fence; } @@ -455,7 +455,7 @@ lp_setup_is_texture_referenced( struct setup_context *setup, static INLINE void lp_setup_update_shader_state( struct setup_context *setup ) { - struct lp_bins *bins = lp_setup_get_current_bins(setup); + struct lp_scene *scene = lp_setup_get_current_scene(setup); LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -465,7 +465,7 @@ lp_setup_update_shader_state( struct setup_context *setup ) uint8_t *stored; unsigned i, j; - stored = lp_bin_alloc_aligned(bins, 4 * 16, 16); + stored = lp_scene_alloc_aligned(scene, 4 * 16, 16); /* smear each blend color component across 16 ubyte elements */ for (i = 0; i < 4; ++i) { @@ -497,7 +497,7 @@ lp_setup_update_shader_state( struct setup_context *setup ) current_size) != 0) { void *stored; - stored = lp_bin_alloc(bins, current_size); + stored = lp_scene_alloc(scene, current_size); if(stored) { memcpy(stored, current_data, @@ -522,12 +522,12 @@ lp_setup_update_shader_state( struct setup_context *setup ) memcmp(setup->fs.stored, &setup->fs.current, sizeof setup->fs.current) != 0) { - /* The fs state that's been stored in the bins is different from + /* The fs state that's been stored in the scene is different from * the new, current state. So allocate a new lp_rast_state object * and append it to the bin's setup data buffer. */ struct lp_rast_state *stored = - (struct lp_rast_state *) lp_bin_alloc(bins, sizeof *stored); + (struct lp_rast_state *) lp_scene_alloc(scene, sizeof *stored); if(stored) { memcpy(stored, &setup->fs.current, @@ -535,9 +535,9 @@ lp_setup_update_shader_state( struct setup_context *setup ) setup->fs.stored = stored; /* put the state-set command into all bins */ - lp_bin_state_command( bins, - lp_rast_set_state, - lp_rast_arg_state(setup->fs.stored) ); + lp_scene_bin_state_command( scene, + lp_rast_set_state, + lp_rast_arg_state(setup->fs.stored) ); } } } @@ -587,12 +587,12 @@ lp_setup_destroy( struct setup_context *setup ) pipe_buffer_reference(&setup->constants.current, NULL); - /* free the bins in the 'empty' queue */ - while (lp_bins_queue_count(setup->empty_bins) > 0) { - struct lp_bins *bins = lp_bins_dequeue(setup->empty_bins); - if (!bins) + /* free the scenes in the 'empty' queue */ + while (lp_scene_queue_count(setup->empty_scenes) > 0) { + struct lp_scene *scene = lp_scene_dequeue(setup->empty_scenes); + if (!scene) break; - lp_bins_destroy(bins); + lp_scene_destroy(scene); } lp_rast_destroy( setup->rast ); @@ -614,18 +614,18 @@ lp_setup_create( struct pipe_screen *screen ) if (!setup) return NULL; - setup->empty_bins = lp_bins_queue_create(); - if (!setup->empty_bins) + setup->empty_scenes = lp_scene_queue_create(); + if (!setup->empty_scenes) goto fail; - setup->rast = lp_rast_create( screen, setup->empty_bins ); + setup->rast = lp_rast_create( screen, setup->empty_scenes ); if (!setup->rast) goto fail; - /* create some empty bins */ - for (i = 0; i < MAX_BINS; i++) { - struct lp_bins *bins = lp_bins_create(); - lp_bins_enqueue(setup->empty_bins, bins); + /* create some empty scenes */ + for (i = 0; i < MAX_SCENES; i++) { + struct lp_scene *scene = lp_scene_create(); + lp_scene_enqueue(setup->empty_scenes, scene); } setup->triangle = first_triangle; @@ -637,8 +637,8 @@ lp_setup_create( struct pipe_screen *screen ) return setup; fail: - if (setup->empty_bins) - lp_bins_queue_destroy(setup->empty_bins); + if (setup->empty_scenes) + lp_scene_queue_destroy(setup->empty_scenes); FREE(setup); return NULL; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 584e37665b..180d9eca84 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -38,7 +38,7 @@ #include "lp_setup.h" #include "lp_rast.h" #include "lp_tile_soa.h" /* for TILE_SIZE */ -#include "lp_bin.h" +#include "lp_scene.h" #define LP_SETUP_NEW_FS 0x01 @@ -46,7 +46,7 @@ #define LP_SETUP_NEW_BLEND_COLOR 0x04 -struct lp_bins_queue; +struct lp_scene_queue; /** @@ -59,8 +59,8 @@ struct setup_context { struct lp_rasterizer *rast; - struct lp_bins *bins; /**< current bins */ - struct lp_bins_queue *empty_bins; /**< queue of empty bins */ + struct lp_scene *scene; /**< current scene */ + struct lp_scene_queue *empty_scenes; /**< queue of empty scenes */ boolean ccw_is_frontface; unsigned cullmode; @@ -83,7 +83,7 @@ struct setup_context { struct lp_shader_input input[PIPE_MAX_ATTRIBS]; unsigned nr_inputs; - const struct lp_rast_state *stored; /**< what's in the bins */ + const struct lp_rast_state *stored; /**< what's in the scene */ struct lp_rast_state current; /**< currently set state */ } fs; @@ -118,6 +118,6 @@ void lp_setup_choose_triangle( struct setup_context *setup ); void lp_setup_choose_line( struct setup_context *setup ); void lp_setup_choose_point( struct setup_context *setup ); -struct lp_bins *lp_setup_get_current_bins(struct setup_context *setup); +struct lp_scene *lp_setup_get_current_scene(struct setup_context *setup); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 80617120b1..aeaf260af2 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -178,7 +178,7 @@ static void setup_tri_coefficients( struct setup_context *setup, const float (*v3)[4], boolean frontface) { - struct lp_bins *bins = lp_setup_get_current_bins(setup); + struct lp_scene *scene = lp_setup_get_current_scene(setup); unsigned slot; /* Allocate space for the a0, dadx and dady arrays @@ -186,9 +186,9 @@ static void setup_tri_coefficients( struct setup_context *setup, { unsigned bytes; bytes = (setup->fs.nr_inputs + 1) * 4 * sizeof(float); - tri->inputs.a0 = lp_bin_alloc_aligned( bins, bytes, 16 ); - tri->inputs.dadx = lp_bin_alloc_aligned( bins, bytes, 16 ); - tri->inputs.dady = lp_bin_alloc_aligned( bins, bytes, 16 ); + tri->inputs.a0 = lp_scene_alloc_aligned( scene, bytes, 16 ); + tri->inputs.dadx = lp_scene_alloc_aligned( scene, bytes, 16 ); + tri->inputs.dady = lp_scene_alloc_aligned( scene, bytes, 16 ); } /* The internal position input is in slot zero: @@ -246,8 +246,8 @@ static inline int subpixel_snap( float a ) /** * Do basic setup for triangle rasterization and determine which - * framebuffer tiles are touched. Put the triangle in the bins for the - * tiles which we overlap. + * framebuffer tiles are touched. Put the triangle in the scene's + * bins for the tiles which we overlap. */ static void do_triangle_ccw(struct setup_context *setup, @@ -264,8 +264,8 @@ do_triangle_ccw(struct setup_context *setup, const int y2 = subpixel_snap(v2[0][1]); const int y3 = subpixel_snap(v3[0][1]); - struct lp_bins *bins = lp_setup_get_current_bins(setup); - struct lp_rast_triangle *tri = lp_bin_alloc( bins, sizeof *tri ); + struct lp_scene *scene = lp_setup_get_current_scene(setup); + struct lp_rast_triangle *tri = lp_scene_alloc( scene, sizeof *tri ); float area, oneoverarea; int minx, maxx, miny, maxy; @@ -285,7 +285,7 @@ do_triangle_ccw(struct setup_context *setup, * XXX: subject to overflow?? */ if (area <= 0) { - lp_bin_putback_data( bins, sizeof *tri ); + lp_scene_putback_data( scene, sizeof *tri ); return; } @@ -297,7 +297,7 @@ do_triangle_ccw(struct setup_context *setup, if (tri->miny == tri->maxy || tri->minx == tri->maxx) { - lp_bin_putback_data( bins, sizeof *tri ); + lp_scene_putback_data( scene, sizeof *tri ); return; } @@ -407,8 +407,8 @@ do_triangle_ccw(struct setup_context *setup, { /* Triangle is contained in a single tile: */ - lp_bin_command( bins, minx, miny, lp_rast_triangle, - lp_rast_arg_triangle(tri) ); + lp_scene_bin_command( scene, minx, miny, lp_rast_triangle, + lp_rast_arg_triangle(tri) ); } else { @@ -466,17 +466,17 @@ do_triangle_ccw(struct setup_context *setup, { in = 1; /* triangle covers the whole tile- shade whole tile */ - lp_bin_command( bins, x, y, - lp_rast_shade_tile, - lp_rast_arg_inputs(&tri->inputs) ); + lp_scene_bin_command( scene, x, y, + lp_rast_shade_tile, + lp_rast_arg_inputs(&tri->inputs) ); } else { in = 1; /* shade partial tile */ - lp_bin_command( bins, x, y, - lp_rast_triangle, - lp_rast_arg_triangle(tri) ); + lp_scene_bin_command( scene, x, y, + lp_rast_triangle, + lp_rast_arg_triangle(tri) ); } /* Iterate cx values across the region: -- cgit v1.2.3 From b06b3a492519de85a53604e8b72a201d284584ea Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 14 Dec 2009 11:49:23 -0700 Subject: llvmpipe: update file list in Makefile --- src/gallium/drivers/llvmpipe/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 7c4cf320b9..345326e33d 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -6,8 +6,8 @@ LIBNAME = llvmpipe CFLAGS += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS C_SOURCES = \ - lp_bin.c \ - lp_bin_queue.c \ + lp_scene.c \ + lp_scene_queue.c \ lp_bld_alpha.c \ lp_bld_arit.c \ lp_bld_blend_aos.c \ -- cgit v1.2.3 From 6e2a93739e2bbd857c62e1c3959a9032d591717a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 14 Dec 2009 15:24:31 -0700 Subject: llvmpipe: fix broken lp_build_abs() --- src/gallium/drivers/llvmpipe/lp_bld_arit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index d27ef0de04..e7eb5f833b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -591,7 +591,8 @@ lp_build_abs(struct lp_build_context *bld, if(type.floating) { /* Mask out the sign bit */ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long)1 << type.width) - 1); + unsigned long absMask = ~(1 << (type.width - 1)); + LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask)); a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); a = LLVMBuildAnd(bld->builder, a, mask, ""); a = LLVMBuildBitCast(bld->builder, a, vec_type, ""); -- cgit v1.2.3 From 55879440d703bf9f5a4040d04a2f2cd024fa07c2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 14 Dec 2009 15:27:35 -0700 Subject: llvmpipe: fix broken TGSI_OPCODE_FRC codegen --- src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index adc81569ed..83ac25bb20 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -777,7 +777,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { src0 = emit_fetch( bld, inst, 0, chan_index ); tmp0 = lp_build_floor(&bld->base, src0); - tmp0 = lp_build_sub(&bld->base, tmp0, src0); + tmp0 = lp_build_sub(&bld->base, src0, tmp0); dst0[chan_index] = tmp0; } break; -- cgit v1.2.3 From 276b8523e82c36ec2def21d16fdf7f6a32a3bd37 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 15 Dec 2009 07:58:37 -0700 Subject: llvmpipe: use 1ULL to be ready for 64-bit arithmetic someday --- src/gallium/drivers/llvmpipe/lp_bld_arit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index e7eb5f833b..f8260938f5 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -591,7 +591,7 @@ lp_build_abs(struct lp_build_context *bld, if(type.floating) { /* Mask out the sign bit */ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - unsigned long absMask = ~(1 << (type.width - 1)); + unsigned long long absMask = ~(1ULL << (type.width - 1)); LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask)); a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); a = LLVMBuildAnd(bld->builder, a, mask, ""); -- cgit v1.2.3 From cdbcd96fdfe2c4d09e9b34cb083664d6b6e0558b Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 15 Dec 2009 15:39:48 -0700 Subject: llvmpipe: tighten up an assertion --- src/gallium/drivers/llvmpipe/lp_rast.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index fd9cd67d85..ec87d907b8 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -358,8 +358,8 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, masks[1][1][1][1] = mask & (1 << (1*8+1*4+1*2+1)) ? ~0 : 0; #endif - assert((x % 2) == 0); - assert((y % 2) == 0); + assert((x % 4) == 0); + assert((y % 4) == 0); ix = x % TILE_SIZE; iy = y % TILE_SIZE; -- cgit v1.2.3 From 2297bc9233be014b7b5aa037769209fbe9f6a66c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 16 Dec 2009 12:32:45 -0700 Subject: llvmpipe: refactor lp_build_cmp() to use lp_build_compare() --- src/gallium/drivers/llvmpipe/lp_bld_logic.c | 81 +++++++++++++++++------------ src/gallium/drivers/llvmpipe/lp_bld_logic.h | 8 +++ 2 files changed, 56 insertions(+), 33 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.c b/src/gallium/drivers/llvmpipe/lp_bld_logic.c index 9470f834fc..d094a040d6 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.c @@ -42,16 +42,16 @@ /** - * Build code to compare two values 'a' and 'b' using the given func. - * \parm func one of PIPE_FUNC_x + * Build code to compare two values 'a' and 'b' of 'type' using the given func. + * \param func one of PIPE_FUNC_x */ LLVMValueRef -lp_build_cmp(struct lp_build_context *bld, - unsigned func, - LLVMValueRef a, - LLVMValueRef b) +lp_build_compare(LLVMBuilderRef builder, + const struct lp_type type, + unsigned func, + LLVMValueRef a, + LLVMValueRef b) { - const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef zeros = LLVMConstNull(int_vec_type); @@ -104,7 +104,7 @@ lp_build_cmp(struct lp_build_context *bld, break; default: assert(0); - return bld->undef; + return lp_build_undef(type); } if(swap) { @@ -117,11 +117,11 @@ lp_build_cmp(struct lp_build_context *bld, } args[2] = LLVMConstInt(LLVMInt8Type(), cc, 0); - res = lp_build_intrinsic(bld->builder, + res = lp_build_intrinsic(builder, "llvm.x86.sse.cmp.ps", vec_type, args, 3); - res = LLVMBuildBitCast(bld->builder, res, int_vec_type, ""); + res = LLVMBuildBitCast(builder, res, int_vec_type, ""); return res; } else if(util_cpu_caps.has_sse2) { @@ -161,7 +161,7 @@ lp_build_cmp(struct lp_build_context *bld, break; default: assert(0); - return bld->undef; + return lp_build_undef(type); } /* There are no signed byte and unsigned word/dword comparison @@ -171,8 +171,8 @@ lp_build_cmp(struct lp_build_context *bld, ((type.width == 8 && type.sign) || (type.width != 8 && !type.sign))) { LLVMValueRef msb = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); - a = LLVMBuildXor(bld->builder, a, msb, ""); - b = LLVMBuildXor(bld->builder, b, msb, ""); + a = LLVMBuildXor(builder, a, msb, ""); + b = LLVMBuildXor(builder, b, msb, ""); } if(table[func].swap) { @@ -185,14 +185,14 @@ lp_build_cmp(struct lp_build_context *bld, } if(table[func].eq) - res = lp_build_intrinsic(bld->builder, pcmpeq, vec_type, args, 2); + res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2); else if (table[func].gt) - res = lp_build_intrinsic(bld->builder, pcmpgt, vec_type, args, 2); + res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2); else res = LLVMConstNull(vec_type); if(table[func].not) - res = LLVMBuildNot(bld->builder, res, ""); + res = LLVMBuildNot(builder, res, ""); return res; } @@ -228,28 +228,28 @@ lp_build_cmp(struct lp_build_context *bld, break; default: assert(0); - return bld->undef; + return lp_build_undef(type); } #if 0 /* XXX: Although valid IR, no LLVM target currently support this */ - cond = LLVMBuildFCmp(bld->builder, op, a, b, ""); - res = LLVMBuildSelect(bld->builder, cond, ones, zeros, ""); + cond = LLVMBuildFCmp(builder, op, a, b, ""); + res = LLVMBuildSelect(builder, cond, ones, zeros, ""); #else debug_printf("%s: warning: using slow element-wise vector comparison\n", __FUNCTION__); res = LLVMGetUndef(int_vec_type); for(i = 0; i < type.length; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - cond = LLVMBuildFCmp(bld->builder, op, - LLVMBuildExtractElement(bld->builder, a, index, ""), - LLVMBuildExtractElement(bld->builder, b, index, ""), + cond = LLVMBuildFCmp(builder, op, + LLVMBuildExtractElement(builder, a, index, ""), + LLVMBuildExtractElement(builder, b, index, ""), ""); - cond = LLVMBuildSelect(bld->builder, cond, + cond = LLVMBuildSelect(builder, cond, LLVMConstExtractElement(ones, index), LLVMConstExtractElement(zeros, index), ""); - res = LLVMBuildInsertElement(bld->builder, res, cond, index, ""); + res = LLVMBuildInsertElement(builder, res, cond, index, ""); } #endif } @@ -276,28 +276,28 @@ lp_build_cmp(struct lp_build_context *bld, break; default: assert(0); - return bld->undef; + return lp_build_undef(type); } #if 0 /* XXX: Although valid IR, no LLVM target currently support this */ - cond = LLVMBuildICmp(bld->builder, op, a, b, ""); - res = LLVMBuildSelect(bld->builder, cond, ones, zeros, ""); + cond = LLVMBuildICmp(builder, op, a, b, ""); + res = LLVMBuildSelect(builder, cond, ones, zeros, ""); #else - debug_printf("%s: warning: using slow element-wise vector comparison\n", + debug_printf("%s: warning: using slow element-wise int vector comparison\n", __FUNCTION__); res = LLVMGetUndef(int_vec_type); for(i = 0; i < type.length; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - cond = LLVMBuildICmp(bld->builder, op, - LLVMBuildExtractElement(bld->builder, a, index, ""), - LLVMBuildExtractElement(bld->builder, b, index, ""), + cond = LLVMBuildICmp(builder, op, + LLVMBuildExtractElement(builder, a, index, ""), + LLVMBuildExtractElement(builder, b, index, ""), ""); - cond = LLVMBuildSelect(bld->builder, cond, + cond = LLVMBuildSelect(builder, cond, LLVMConstExtractElement(ones, index), LLVMConstExtractElement(zeros, index), ""); - res = LLVMBuildInsertElement(bld->builder, res, cond, index, ""); + res = LLVMBuildInsertElement(builder, res, cond, index, ""); } #endif } @@ -306,6 +306,21 @@ lp_build_cmp(struct lp_build_context *bld, } + +/** + * Build code to compare two values 'a' and 'b' using the given func. + * \param func one of PIPE_FUNC_x + */ +LLVMValueRef +lp_build_cmp(struct lp_build_context *bld, + unsigned func, + LLVMValueRef a, + LLVMValueRef b) +{ + return lp_build_compare(bld->builder, bld->type, func, a, b); +} + + LLVMValueRef lp_build_select(struct lp_build_context *bld, LLVMValueRef mask, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.h b/src/gallium/drivers/llvmpipe/lp_bld_logic.h index a4ee7723b5..d687636656 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.h @@ -46,6 +46,14 @@ struct lp_type type; struct lp_build_context; +LLVMValueRef +lp_build_compare(LLVMBuilderRef builder, + const struct lp_type type, + unsigned func, + LLVMValueRef a, + LLVMValueRef b); + + /** * @param func is one of PIPE_FUNC_xxx */ -- cgit v1.2.3 From e288796c92bb7d75cd6dfee968804c6230ef38d7 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 16 Dec 2009 12:33:11 -0700 Subject: llvmpipe: added lp_build_int32_vec4_type() --- src/gallium/drivers/llvmpipe/lp_bld_type.c | 21 +++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_bld_type.h | 4 ++++ 2 files changed, 25 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.c b/src/gallium/drivers/llvmpipe/lp_bld_type.c index 606243d6c5..e8cf7256c0 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.c @@ -157,6 +157,27 @@ lp_build_int_vec_type(struct lp_type type) } +/** + * Build int32[4] vector type + */ +LLVMTypeRef +lp_build_int32_vec4_type() +{ + struct lp_type t; + LLVMTypeRef type; + + memset(&t, 0, sizeof(t)); + t.floating = FALSE; /* floating point values */ + t.sign = TRUE; /* values are signed */ + t.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ + t.width = 32; /* 32-bit int */ + t.length = 4; /* 4 elements per vector */ + + type = lp_build_int_elem_type(t); + return LLVMVectorType(type, t.length); +} + + struct lp_type lp_int_type(struct lp_type type) { diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h index ee5ca3483c..118fb33908 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.h @@ -162,6 +162,10 @@ LLVMTypeRef lp_build_int_vec_type(struct lp_type type); +LLVMTypeRef +lp_build_int32_vec4_type(); + + struct lp_type lp_int_type(struct lp_type type); -- cgit v1.2.3 From ab9438193083b7f9a3180cb9cea45e269131048a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 16 Dec 2009 16:02:59 -0700 Subject: llvmpipe: do final the pixel in/out triangle test in the fragment shader The test to determine which of the pixels in a 2x2 quad is now done in the fragment shader rather than in the calling C code. This is a little faster but there's a few more things to do. Note that the step[] array elements are in a different order now. Rather than being in row-major order for the 4x4 grid, they're in "quad-major" order. The setup of the step arrays is a little more complicated now. So is the course/intermediate tile test code, but some lookup tables help with that. Next steps: - early-cull 2x2 quads which are totally outside the triangle. - skip the in/out test for fully contained quads - make the in/out comparison code tighter/faster. --- src/gallium/drivers/llvmpipe/lp_jit.h | 9 +- src/gallium/drivers/llvmpipe/lp_rast.c | 76 +++------- src/gallium/drivers/llvmpipe/lp_rast.h | 11 +- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 11 +- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 222 +++++++++++++++------------- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 49 +++--- src/gallium/drivers/llvmpipe/lp_state_fs.c | 144 ++++++++++++++++-- 7 files changed, 302 insertions(+), 220 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 7eccb5da85..e8fb7d990f 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -114,9 +114,14 @@ typedef void const void *a0, const void *dadx, const void *dady, - const uint32_t *mask, void *color, - void *depth); + void *depth, + const int32_t c1, + const int32_t c2, + const int32_t c3, + const int32_t *step1, + const int32_t *step2, + const int32_t *step3); diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index ec87d907b8..b1bd27d340 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -25,6 +25,7 @@ * **************************************************************************/ +#include #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_cpu_detect.h" @@ -279,6 +280,8 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, unsigned thread_index, const union lp_rast_cmd_arg arg ) { + /* Set c1,c2,c3 to large values so the in/out test always passes */ + const int32_t c1 = INT_MAX/2, c2 = INT_MAX/2, c3 = INT_MAX/2; const struct lp_rast_shader_inputs *inputs = arg.shade_tile; const unsigned tile_x = rast->tasks[thread_index].x; const unsigned tile_y = rast->tasks[thread_index].y; @@ -296,7 +299,7 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, inputs, tile_x + x, tile_y + y, - mask); + c1, c2, c3); } @@ -308,58 +311,25 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, unsigned thread_index, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, - unsigned mask) + int32_t c1, int32_t c2, int32_t c3) { -#if 1 const struct lp_rast_state *state = rast->tasks[thread_index].current_state; struct lp_rast_tile *tile = &rast->tasks[thread_index].tile; void *color; void *depth; - uint32_t ALIGN16_ATTRIB masks[2][2][2][2]; unsigned ix, iy; int block_offset; +#ifdef DEBUG assert(state); /* Sanity checks */ assert(x % TILE_VECTOR_WIDTH == 0); assert(y % TILE_VECTOR_HEIGHT == 0); - /* mask: the rasterizer wants to treat pixels in 4x4 blocks, but - * the pixel shader wants to swizzle them into 4 2x2 quads. - * - * Additionally, the pixel shader wants masks as full dword ~0, - * while the rasterizer wants to pack per-pixel bits tightly. - */ -#if 0 - unsigned qx, qy; - for (qy = 0; qy < 2; ++qy) - for (qx = 0; qx < 2; ++qx) - for (iy = 0; iy < 2; ++iy) - for (ix = 0; ix < 2; ++ix) - masks[qy][qx][iy][ix] = mask & (1 << (qy*8+iy*4+qx*2+ix)) ? ~0 : 0; -#else - masks[0][0][0][0] = mask & (1 << (0*8+0*4+0*2+0)) ? ~0 : 0; - masks[0][0][0][1] = mask & (1 << (0*8+0*4+0*2+1)) ? ~0 : 0; - masks[0][0][1][0] = mask & (1 << (0*8+1*4+0*2+0)) ? ~0 : 0; - masks[0][0][1][1] = mask & (1 << (0*8+1*4+0*2+1)) ? ~0 : 0; - masks[0][1][0][0] = mask & (1 << (0*8+0*4+1*2+0)) ? ~0 : 0; - masks[0][1][0][1] = mask & (1 << (0*8+0*4+1*2+1)) ? ~0 : 0; - masks[0][1][1][0] = mask & (1 << (0*8+1*4+1*2+0)) ? ~0 : 0; - masks[0][1][1][1] = mask & (1 << (0*8+1*4+1*2+1)) ? ~0 : 0; - - masks[1][0][0][0] = mask & (1 << (1*8+0*4+0*2+0)) ? ~0 : 0; - masks[1][0][0][1] = mask & (1 << (1*8+0*4+0*2+1)) ? ~0 : 0; - masks[1][0][1][0] = mask & (1 << (1*8+1*4+0*2+0)) ? ~0 : 0; - masks[1][0][1][1] = mask & (1 << (1*8+1*4+0*2+1)) ? ~0 : 0; - masks[1][1][0][0] = mask & (1 << (1*8+0*4+1*2+0)) ? ~0 : 0; - masks[1][1][0][1] = mask & (1 << (1*8+0*4+1*2+1)) ? ~0 : 0; - masks[1][1][1][0] = mask & (1 << (1*8+1*4+1*2+0)) ? ~0 : 0; - masks[1][1][1][1] = mask & (1 << (1*8+1*4+1*2+1)) ? ~0 : 0; -#endif - assert((x % 4) == 0); assert((y % 4) == 0); +#endif ix = x % TILE_SIZE; iy = y % TILE_SIZE; @@ -373,39 +343,27 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, /* depth buffer */ depth = tile->depth + block_offset; - /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ - assert(lp_check_alignment(masks, 16)); - +#ifdef DEBUG assert(lp_check_alignment(depth, 16)); assert(lp_check_alignment(color, 16)); assert(lp_check_alignment(state->jit_context.blend_color, 16)); + assert(lp_check_alignment(inputs->step[0], 16)); + assert(lp_check_alignment(inputs->step[1], 16)); + assert(lp_check_alignment(inputs->step[2], 16)); +#endif + /* run shader */ state->jit_function( &state->jit_context, x, y, inputs->a0, inputs->dadx, inputs->dady, - &masks[0][0][0][0], color, - depth); -#else - struct lp_rast_tile *tile = &rast->tile; - unsigned chan_index; - unsigned q, ix, iy; - - x %= TILE_SIZE; - y %= TILE_SIZE; - - /* mask */ - for (q = 0; q < 4; ++q) - for(iy = 0; iy < 2; ++iy) - for(ix = 0; ix < 2; ++ix) - if(masks[q] & (1 << (iy*2 + ix))) - for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index) - TILE_PIXEL(tile->color, x + q*2 + ix, y + iy, chan_index) = 0xff; - -#endif + depth, + c1, c2, c3, + inputs->step[0], inputs->step[1], inputs->step[2] + ); } diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 2dd0193d8d..46e22f69a6 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -80,6 +80,9 @@ struct lp_rast_shader_inputs { float (*a0)[4]; float (*dadx)[4]; float (*dady)[4]; + + /* edge/step info for 3 edges and 4x4 block of pixels */ + int ALIGN16_ATTRIB step[3][16]; }; @@ -117,14 +120,10 @@ struct lp_rast_triangle { int dx31; /* edge function values at minx,miny ?? */ - int c1; - int c2; - int c3; - - int step[3][16]; + int c1, c2, c3; /* inputs for the shader */ - struct lp_rast_shader_inputs inputs; + struct lp_rast_shader_inputs ALIGN16_ATTRIB inputs; }; diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 79a90f6610..cd72d7e69d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -61,15 +61,6 @@ struct lp_rasterizer_task unsigned x, y; /**< Pos of this tile in framebuffer, in pixels */ - /* Pixel blocks produced during rasterization - */ - unsigned nr_blocks; - struct { - unsigned x; - unsigned y; - unsigned mask; - } blocks[256]; - const struct lp_rast_state *current_state; /** "back" pointer */ @@ -133,6 +124,6 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, unsigned thread_index, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, - unsigned masks); + int32_t c1, int32_t c2, int32_t c3); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 6c96010c52..9b1861223a 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -29,6 +29,7 @@ * Rasterization for binned triangles within a tile */ +#include #include "util/u_math.h" #include "lp_debug.h" #include "lp_rast_priv.h" @@ -36,42 +37,89 @@ /** - * Add a 4x4 block of pixels to the block list. - * All pixels are known to be inside the triangle's bounds. + * Map an index in [0,15] to an x,y position, multiplied by 4. + * This is used to get the position of each subtile in a 4x4 + * grid of edge step values. + */ +static const int pos_table4[16][2] = { + { 0, 0 }, + { 4, 0 }, + { 0, 4 }, + { 4, 4 }, + { 8, 0 }, + { 12, 0 }, + { 8, 4 }, + { 12, 4 }, + { 0, 8 }, + { 4, 8 }, + { 0, 12 }, + { 4, 12 }, + { 8, 8 }, + { 12, 8 }, + { 8, 12 }, + { 12, 12 } +}; + + +static const int pos_table16[16][2] = { + { 0, 0 }, + { 16, 0 }, + { 0, 16 }, + { 16, 16 }, + { 32, 0 }, + { 48, 0 }, + { 32, 16 }, + { 48, 16 }, + { 0, 32 }, + { 16, 32 }, + { 0, 48 }, + { 16, 48 }, + { 32, 32 }, + { 48, 32 }, + { 32, 48 }, + { 48, 48 } +}; + + +/** + * Shade all pixels in a 4x4 block. */ static void -block_full_4( struct lp_rasterizer_task *rast_task, int x, int y ) +block_full_4( struct lp_rasterizer_task *rast_task, + const struct lp_rast_triangle *tri, + int x, int y ) { - const unsigned i = rast_task->nr_blocks; - assert(x % 4 == 0); - assert(y % 4 == 0); - rast_task->blocks[i].x = x; - rast_task->blocks[i].y = y; - rast_task->blocks[i].mask = ~0; - rast_task->nr_blocks++; + /* Set c1,c2,c3 to large values so the in/out test always passes */ + const int32_t c1 = INT_MAX/2, c2 = INT_MAX/2, c3 = INT_MAX/2; + lp_rast_shade_quads(rast_task->rast, + rast_task->thread_index, + &tri->inputs, + x, y, + c1, c2, c3); } /** - * Add a 16x16 block of pixels to the block list. - * All pixels are known to be inside the triangle's bounds. + * Shade all pixels in a 16x16 block. */ static void -block_full_16( struct lp_rasterizer_task *rast_task, int x, int y ) +block_full_16( struct lp_rasterizer_task *rast_task, + const struct lp_rast_triangle *tri, + int x, int y ) { unsigned ix, iy; assert(x % 16 == 0); assert(y % 16 == 0); for (iy = 0; iy < 16; iy += 4) for (ix = 0; ix < 16; ix += 4) - block_full_4(rast_task, x + ix, y + iy); + block_full_4(rast_task, tri, x + ix, y + iy); } /** - * Evaluate each pixel in a 4x4 block to determine if it lies within - * the triangle's bounds. - * Generate a mask of in/out flags and add the block to the blocks list. + * Pass the 4x4 pixel block to the shader function. + * Determination of which of the 16 pixels lies inside the triangle + * will be done as part of the fragment shader. */ static void do_block_4( struct lp_rasterizer_task *rast_task, @@ -81,28 +129,11 @@ do_block_4( struct lp_rasterizer_task *rast_task, int c2, int c3 ) { - int i; - unsigned mask = 0; - - assert(x % 4 == 0); - assert(y % 4 == 0); - - for (i = 0; i < 16; i++) { - int any_negative = ((c1 + tri->step[0][i]) | - (c2 + tri->step[1][i]) | - (c3 + tri->step[2][i])) >> 31; - mask |= (~any_negative) & (1 << i); - } - - /* As we do trivial reject already, masks should rarely be all zero: - */ - if (mask) { - const unsigned i = rast_task->nr_blocks; - rast_task->blocks[i].x = x; - rast_task->blocks[i].y = y; - rast_task->blocks[i].mask = mask; - rast_task->nr_blocks++; - } + lp_rast_shade_quads(rast_task->rast, + rast_task->thread_index, + &tri->inputs, + x, y, + c1, c2, c3); } @@ -118,40 +149,42 @@ do_block_16( struct lp_rasterizer_task *rast_task, int c2, int c3 ) { - int ix, iy, i = 0; + const int ei1 = tri->ei1 * 4; + const int ei2 = tri->ei2 * 4; + const int ei3 = tri->ei3 * 4; - int ei1 = tri->ei1 * 4; - int ei2 = tri->ei2 * 4; - int ei3 = tri->ei3 * 4; + const int eo1 = tri->eo1 * 4; + const int eo2 = tri->eo2 * 4; + const int eo3 = tri->eo3 * 4; - int eo1 = tri->eo1 * 4; - int eo2 = tri->eo2 * 4; - int eo3 = tri->eo3 * 4; + int i; assert(x % 16 == 0); assert(y % 16 == 0); - for (iy = 0; iy < 16; iy+=4) { - for (ix = 0; ix < 16; ix+=4, i++) { - int cx1 = c1 + (tri->step[0][i] * 4); - int cx2 = c2 + (tri->step[1][i] * 4); - int cx3 = c3 + (tri->step[2][i] * 4); - - if (cx1 + eo1 < 0 || - cx2 + eo2 < 0 || - cx3 + eo3 < 0) { - /* the block is completely outside the triangle - nop */ - } - else if (cx1 + ei1 > 0 && - cx2 + ei2 > 0 && - cx3 + ei3 > 0) { + for (i = 0; i < 16; i++) { + int cx1 = c1 + (tri->inputs.step[0][i] * 4); + int cx2 = c2 + (tri->inputs.step[1][i] * 4); + int cx3 = c3 + (tri->inputs.step[2][i] * 4); + + if (cx1 + eo1 < 0 || + cx2 + eo2 < 0 || + cx3 + eo3 < 0) { + /* the block is completely outside the triangle - nop */ + } + else { + int px = x + pos_table4[i][0]; + int py = y + pos_table4[i][1]; + if (cx1 + ei1 > 0 && + cx2 + ei2 > 0 && + cx3 + ei3 > 0) { /* the block is completely inside the triangle */ - block_full_4(rast_task, x+ix, y+iy); - } - else { + block_full_4(rast_task, tri, px, py); + } + else { /* the block is partially in/out of the triangle */ - do_block_4(rast_task, tri, x+ix, y+iy, cx1, cx2, cx3); - } + do_block_4(rast_task, tri, px, py, cx1, cx2, cx3); + } } } } @@ -171,8 +204,7 @@ lp_rast_triangle( struct lp_rasterizer *rast, int x = rast_task->x; int y = rast_task->y; - int ix, iy; - unsigned i = 0; + unsigned i; int c1 = tri->c1 + tri->dx12 * y - tri->dy12 * x; int c2 = tri->c2 + tri->dx23 * y - tri->dy23 * x; @@ -186,48 +218,36 @@ lp_rast_triangle( struct lp_rasterizer *rast, int eo2 = tri->eo2 * 16; int eo3 = tri->eo3 * 16; - assert(Elements(rast_task->blocks) == (TILE_SIZE * TILE_SIZE) / (4*4)); - LP_DBG(DEBUG_RAST, "lp_rast_triangle\n"); - rast_task->nr_blocks = 0; - /* Walk over the tile to build a list of 4x4 pixel blocks which will * be filled/shaded. We do this at two granularities: 16x16 blocks * and then 4x4 blocks. */ - for (iy = 0; iy < TILE_SIZE; iy += 16) { - for (ix = 0; ix < TILE_SIZE; ix += 16, i++) { - int cx1 = c1 + (tri->step[0][i] * 16); - int cx2 = c2 + (tri->step[1][i] * 16); - int cx3 = c3 + (tri->step[2][i] * 16); - - if (cx1 + eo1 < 0 || - cx2 + eo2 < 0 || - cx3 + eo3 < 0) { - /* the block is completely outside the triangle - nop */ - } - else if (cx1 + ei1 > 0 && - cx2 + ei2 > 0 && - cx3 + ei3 > 0) { + for (i = 0; i < 16; i++) { + int cx1 = c1 + (tri->inputs.step[0][i] * 16); + int cx2 = c2 + (tri->inputs.step[1][i] * 16); + int cx3 = c3 + (tri->inputs.step[2][i] * 16); + + if (cx1 + eo1 < 0 || + cx2 + eo2 < 0 || + cx3 + eo3 < 0) { + /* the block is completely outside the triangle - nop */ + } + else { + int px = x + pos_table16[i][0]; + int py = y + pos_table16[i][1]; + + if (cx1 + ei1 > 0 && + cx2 + ei2 > 0 && + cx3 + ei3 > 0) { /* the block is completely inside the triangle */ - block_full_16(rast_task, x+ix, y+iy); - } - else { + block_full_16(rast_task, tri, px, py); + } + else { /* the block is partially in/out of the triangle */ - do_block_16(rast_task, tri, x+ix, y+iy, cx1, cx2, cx3); - } + do_block_16(rast_task, tri, px, py, cx1, cx2, cx3); + } } } - - assert(rast_task->nr_blocks <= Elements(rast_task->blocks)); - - /* Shade the 4x4 pixel blocks */ - for (i = 0; i < rast_task->nr_blocks; i++) - lp_rast_shade_quads(rast, - thread_index, - &tri->inputs, - rast_task->blocks[i].x, - rast_task->blocks[i].y, - rast_task->blocks[i].mask); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index aeaf260af2..e15b987767 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -265,7 +265,7 @@ do_triangle_ccw(struct setup_context *setup, const int y3 = subpixel_snap(v3[0][1]); struct lp_scene *scene = lp_setup_get_current_scene(setup); - struct lp_rast_triangle *tri = lp_scene_alloc( scene, sizeof *tri ); + struct lp_rast_triangle *tri = lp_scene_alloc_aligned( scene, sizeof *tri, 16 ); float area, oneoverarea; int minx, maxx, miny, maxy; @@ -354,38 +354,29 @@ do_triangle_ccw(struct setup_context *setup, tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3; { - int xstep1 = -tri->dy12; - int xstep2 = -tri->dy23; - int xstep3 = -tri->dy31; + const int xstep1 = -tri->dy12; + const int xstep2 = -tri->dy23; + const int xstep3 = -tri->dy31; - int ystep1 = tri->dx12; - int ystep2 = tri->dx23; - int ystep3 = tri->dx31; + const int ystep1 = tri->dx12; + const int ystep2 = tri->dx23; + const int ystep3 = tri->dx31; - int ix, iy; + int qx, qy, ix, iy; int i = 0; - int c1 = 0; - int c2 = 0; - int c3 = 0; - - for (iy = 0; iy < 4; iy++) { - int cx1 = c1; - int cx2 = c2; - int cx3 = c3; - - for (ix = 0; ix < 4; ix++, i++) { - tri->step[0][i] = cx1; - tri->step[1][i] = cx2; - tri->step[2][i] = cx3; - cx1 += xstep1; - cx2 += xstep2; - cx3 += xstep3; - } - - c1 += ystep1; - c2 += ystep2; - c3 += ystep3; + for (qy = 0; qy < 2; qy++) { + for (qx = 0; qx < 2; qx++) { + for (iy = 0; iy < 2; iy++) { + for (ix = 0; ix < 2; ix++, i++) { + int x = qx * 2 + ix; + int y = qy * 2 + iy; + tri->inputs.step[0][i] = x * xstep1 + y * ystep1; + tri->inputs.step[1][i] = x * xstep2 + y * ystep2; + tri->inputs.step[2][i] = x * xstep3 + y * ystep3; + } + } + } } } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index c0d5a70a55..4af37e365e 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -175,8 +175,93 @@ generate_depth(LLVMBuilderRef builder, } +/** + * Generate the code to do inside/outside triangle testing for the + * four pixels in a 2x2 quad. This will set the four elements of the + * quad mask vector to 0 or ~0. + * \param i which quad of the quad group to test, in [0,3] + */ +static void +generate_tri_edge_mask(LLVMBuilderRef builder, + unsigned i, + LLVMValueRef *mask, /* ivec4, out */ + LLVMValueRef c0, /* int32 */ + LLVMValueRef c1, /* int32 */ + LLVMValueRef c2, /* int32 */ + LLVMValueRef step0_ptr, /* ivec4 */ + LLVMValueRef step1_ptr, /* ivec4 */ + LLVMValueRef step2_ptr) /* ivec4 */ +{ + /* + c0_vec = splat(c0) + c1_vec = splat(c1) + c2_vec = splat(c2) + s0_vec = c0_vec + step0_ptr[i] + s1_vec = c1_vec + step1_ptr[i] + s2_vec = c2_vec + step2_ptr[i] + m0_vec = s0_vec > {0,0,0,0} + m1_vec = s1_vec > {0,0,0,0} + m2_vec = s2_vec > {0,0,0,0} + mask = m0_vec & m1_vec & m2_vec + */ + struct lp_type i32_type; + LLVMTypeRef i32vec4_type; + + LLVMValueRef index; + LLVMValueRef c0_vec, c1_vec, c2_vec; + LLVMValueRef step0_vec, step1_vec, step2_vec; + LLVMValueRef m0_vec, m1_vec, m2_vec; + LLVMValueRef s0_vec, s1_vec, s2_vec; + LLVMValueRef m; + + LLVMValueRef zeros; + + assert(i < 4); + + /* int32 vector type */ + memset(&i32_type, 0, sizeof i32_type); + i32_type.floating = FALSE; /* values are integers */ + i32_type.sign = TRUE; /* values are signed */ + i32_type.norm = FALSE; /* values are not normalized */ + i32_type.width = 32; /* 32-bit int values */ + i32_type.length = 4; /* 4 elements per vector */ + + i32vec4_type = lp_build_int32_vec4_type(); + + /* int32_vec4 zero = {0,0,0,0} */ + zeros = LLVMConstNull(i32vec4_type); + + c0_vec = lp_build_broadcast(builder, i32vec4_type, c0); + c1_vec = lp_build_broadcast(builder, i32vec4_type, c1); + c2_vec = lp_build_broadcast(builder, i32vec4_type, c2); + + index = LLVMConstInt(LLVMInt32Type(), i, 0); + step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), ""); + step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), ""); + step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), ""); + + /** XXX with a little work, we could remove the add here and just + * compare c0_vec > step0_vec. + */ + s0_vec = LLVMBuildAdd(builder, c0_vec, step0_vec, ""); + s1_vec = LLVMBuildAdd(builder, c1_vec, step1_vec, ""); + s2_vec = LLVMBuildAdd(builder, c2_vec, step2_vec, ""); + m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, s0_vec, zeros); + m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, s1_vec, zeros); + m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, s2_vec, zeros); + + m = LLVMBuildAnd(builder, m0_vec, m1_vec, ""); + m = LLVMBuildAnd(builder, m, m2_vec, ""); + + lp_build_name(m, "m"); + + *mask = m; +} + + /** * Generate the fragment shader, depth/stencil test, and alpha tests. + * \param i which quad in the tile, in range [0,3] */ static void generate_fs(struct llvmpipe_context *lp, @@ -190,7 +275,13 @@ generate_fs(struct llvmpipe_context *lp, struct lp_build_sampler_soa *sampler, LLVMValueRef *pmask, LLVMValueRef *color, - LLVMValueRef depth_ptr) + LLVMValueRef depth_ptr, + LLVMValueRef c0, + LLVMValueRef c1, + LLVMValueRef c2, + LLVMValueRef step0_ptr, + LLVMValueRef step1_ptr, + LLVMValueRef step2_ptr) { const struct tgsi_token *tokens = shader->base.tokens; LLVMTypeRef elem_type; @@ -205,6 +296,8 @@ generate_fs(struct llvmpipe_context *lp, unsigned attrib; unsigned chan; + assert(i < 4); + elem_type = lp_build_elem_type(type); vec_type = lp_build_vec_type(type); int_vec_type = lp_build_int_vec_type(type); @@ -224,8 +317,13 @@ generate_fs(struct llvmpipe_context *lp, } lp_build_flow_scope_declare(flow, &z); + /* do triangle edge testing */ + generate_tri_edge_mask(builder, i, pmask, + c0, c1, c2, step0_ptr, step1_ptr, step2_ptr); + lp_build_mask_begin(&mask, flow, type, *pmask); + early_depth_test = key->depth.enabled && !key->alpha.enabled && @@ -376,17 +474,18 @@ generate_fragment(struct llvmpipe_context *lp, LLVMTypeRef fs_int_vec_type; LLVMTypeRef blend_vec_type; LLVMTypeRef blend_int_vec_type; - LLVMTypeRef arg_types[9]; + LLVMTypeRef arg_types[14]; LLVMTypeRef func_type; + LLVMTypeRef int32_vec4_type = lp_build_int32_vec4_type(); LLVMValueRef context_ptr; LLVMValueRef x; LLVMValueRef y; LLVMValueRef a0_ptr; LLVMValueRef dadx_ptr; LLVMValueRef dady_ptr; - LLVMValueRef mask_ptr; LLVMValueRef color_ptr; LLVMValueRef depth_ptr; + LLVMValueRef c0, c1, c2, step0_ptr, step1_ptr, step2_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef x0; @@ -468,9 +567,17 @@ generate_fragment(struct llvmpipe_context *lp, arg_types[3] = LLVMPointerType(fs_elem_type, 0); /* a0 */ arg_types[4] = LLVMPointerType(fs_elem_type, 0); /* dadx */ arg_types[5] = LLVMPointerType(fs_elem_type, 0); /* dady */ - arg_types[6] = LLVMPointerType(fs_int_vec_type, 0); /* mask */ - arg_types[7] = LLVMPointerType(blend_vec_type, 0); /* color */ - arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */ + arg_types[6] = LLVMPointerType(blend_vec_type, 0); /* color */ + arg_types[7] = LLVMPointerType(fs_int_vec_type, 0); /* depth */ + arg_types[8] = LLVMInt32Type(); /* c0 */ + arg_types[9] = LLVMInt32Type(); /* c1 */ + arg_types[10] = LLVMInt32Type(); /* c2 */ + /* Note: the step arrays are built as int32[16] but we interpret + * them here as int32_vec4[4]. + */ + arg_types[11] = LLVMPointerType(int32_vec4_type, 0);/* step0 */ + arg_types[12] = LLVMPointerType(int32_vec4_type, 0);/* step1 */ + arg_types[13] = LLVMPointerType(int32_vec4_type, 0);/* step2 */ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); @@ -486,9 +593,14 @@ generate_fragment(struct llvmpipe_context *lp, a0_ptr = LLVMGetParam(variant->function, 3); dadx_ptr = LLVMGetParam(variant->function, 4); dady_ptr = LLVMGetParam(variant->function, 5); - mask_ptr = LLVMGetParam(variant->function, 6); - color_ptr = LLVMGetParam(variant->function, 7); - depth_ptr = LLVMGetParam(variant->function, 8); + color_ptr = LLVMGetParam(variant->function, 6); + depth_ptr = LLVMGetParam(variant->function, 7); + c0 = LLVMGetParam(variant->function, 8); + c1 = LLVMGetParam(variant->function, 9); + c2 = LLVMGetParam(variant->function, 10); + step0_ptr = LLVMGetParam(variant->function, 11); + step1_ptr = LLVMGetParam(variant->function, 12); + step2_ptr = LLVMGetParam(variant->function, 13); lp_build_name(context_ptr, "context"); lp_build_name(x, "x"); @@ -496,9 +608,14 @@ generate_fragment(struct llvmpipe_context *lp, lp_build_name(a0_ptr, "a0"); lp_build_name(dadx_ptr, "dadx"); lp_build_name(dady_ptr, "dady"); - lp_build_name(mask_ptr, "mask"); lp_build_name(color_ptr, "color"); lp_build_name(depth_ptr, "depth"); + lp_build_name(c0, "c0"); + lp_build_name(c1, "c1"); + lp_build_name(c2, "c2"); + lp_build_name(step0_ptr, "step0"); + lp_build_name(step1_ptr, "step1"); + lp_build_name(step2_ptr, "step2"); /* * Function body @@ -526,7 +643,6 @@ generate_fragment(struct llvmpipe_context *lp, if(i != 0) lp_build_interp_soa_update(&interp, i); - fs_mask[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, mask_ptr, &index, 1, ""), ""); depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, ""); generate_fs(lp, shader, key, @@ -536,9 +652,11 @@ generate_fragment(struct llvmpipe_context *lp, i, &interp, sampler, - &fs_mask[i], + &fs_mask[i], /* output */ out_color, - depth_ptr_i); + depth_ptr_i, + c0, c1, c2, + step0_ptr, step1_ptr, step2_ptr); for(chan = 0; chan < NUM_CHANNELS; ++chan) fs_out_color[chan][i] = out_color[chan]; -- cgit v1.2.3 From 7f2ba80025e4b534db72427a206e6a542fc2f520 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 17 Dec 2009 11:29:37 +0000 Subject: llvmpipe: keep copy of framebuffer state in setup context Avoids crashes when first frame is rendered before window is mapped. Avoids potential issue where fb state is changed before setup context is flushed. --- src/gallium/drivers/llvmpipe/lp_setup.c | 24 ++++++++++++------------ src/gallium/drivers/llvmpipe/lp_setup_context.h | 2 +- src/gallium/drivers/llvmpipe/lp_state_surface.c | 2 +- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 76e0955237..e361e5df63 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -36,6 +36,7 @@ #include "pipe/p_inlines.h" #include "util/u_memory.h" #include "util/u_pack_color.h" +#include "util/u_surface.h" #include "lp_scene.h" #include "lp_scene_queue.h" #include "lp_debug.h" @@ -61,10 +62,9 @@ lp_setup_get_current_scene(struct setup_context *setup) setup->scene = lp_scene_dequeue(setup->empty_scenes); if(0)lp_scene_reset( setup->scene ); /* XXX temporary? */ - if (setup->fb) { - lp_scene_set_framebuffer_size(setup->scene, - setup->fb->width, setup->fb->height); - } + lp_scene_set_framebuffer_size(setup->scene, + setup->fb.width, + setup->fb.height); } return setup->scene; } @@ -134,9 +134,9 @@ lp_setup_rasterize_scene( struct setup_context *setup, struct lp_scene *scene = lp_setup_get_current_scene(setup); lp_rasterize_scene(setup->rast, - scene, - setup->fb, - write_depth); + scene, + &setup->fb, + write_depth); reset_context( setup ); @@ -152,7 +152,7 @@ begin_binning( struct setup_context *setup ) LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - if (setup->fb->cbufs[0]) { + if (setup->fb.cbufs[0]) { if (setup->clear.flags & PIPE_CLEAR_COLOR) lp_scene_bin_everywhere( scene, lp_rast_clear_color, @@ -163,7 +163,7 @@ begin_binning( struct setup_context *setup ) lp_rast_arg_null() ); } - if (setup->fb->zsbuf) { + if (setup->fb.zsbuf) { if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) lp_scene_bin_everywhere( scene, lp_rast_clear_zstencil, @@ -248,9 +248,9 @@ lp_setup_bind_framebuffer( struct setup_context *setup, set_state( setup, SETUP_FLUSHED ); - setup->fb = fb; + util_copy_framebuffer_state(&setup->fb, fb); - lp_scene_set_framebuffer_size(scene, setup->fb->width, setup->fb->height); + lp_scene_set_framebuffer_size(scene, setup->fb.width, setup->fb.height); } @@ -274,7 +274,7 @@ lp_setup_clear( struct setup_context *setup, if (flags & PIPE_CLEAR_DEPTHSTENCIL) { setup->clear.zstencil.clear_zstencil = - util_pack_z_stencil(setup->fb->zsbuf->format, + util_pack_z_stencil(setup->fb.zsbuf->format, depth, stencil); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 180d9eca84..f6604a8034 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -65,7 +65,7 @@ struct setup_context { boolean ccw_is_frontface; unsigned cullmode; - const struct pipe_framebuffer_state *fb; + struct pipe_framebuffer_state fb; struct { unsigned flags; diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c index 21565436eb..957e947fe0 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -68,7 +68,7 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, draw_set_mrd(lp->draw, mrd); } - lp_setup_bind_framebuffer( lp->setup, fb ); + lp_setup_bind_framebuffer( lp->setup, &lp->framebuffer ); lp->dirty |= LP_NEW_FRAMEBUFFER; } -- cgit v1.2.3 From b9d33db0a4cb818154b713a27834f66025b14672 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 16 Dec 2009 17:08:13 -0700 Subject: llvmpipe: improve the in/out test a little Instead of: s = c + step m = s > 0 Do: m = step > c (with negated c) --- src/gallium/drivers/llvmpipe/lp_rast.c | 3 +-- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 4 ++-- src/gallium/drivers/llvmpipe/lp_state_fs.c | 24 ++++++------------------ 3 files changed, 9 insertions(+), 22 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index b1bd27d340..015865a6d6 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -281,11 +281,10 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { /* Set c1,c2,c3 to large values so the in/out test always passes */ - const int32_t c1 = INT_MAX/2, c2 = INT_MAX/2, c3 = INT_MAX/2; + const int32_t c1 = INT_MIN/2, c2 = INT_MIN/2, c3 = INT_MIN/2; const struct lp_rast_shader_inputs *inputs = arg.shade_tile; const unsigned tile_x = rast->tasks[thread_index].x; const unsigned tile_y = rast->tasks[thread_index].y; - const unsigned mask = ~0; unsigned x, y; LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 9b1861223a..d6e8d6d5ab 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -90,7 +90,7 @@ block_full_4( struct lp_rasterizer_task *rast_task, int x, int y ) { /* Set c1,c2,c3 to large values so the in/out test always passes */ - const int32_t c1 = INT_MAX/2, c2 = INT_MAX/2, c3 = INT_MAX/2; + const int32_t c1 = INT_MIN/2, c2 = INT_MIN/2, c3 = INT_MIN/2; lp_rast_shade_quads(rast_task->rast, rast_task->thread_index, &tri->inputs, @@ -133,7 +133,7 @@ do_block_4( struct lp_rasterizer_task *rast_task, rast_task->thread_index, &tri->inputs, x, y, - c1, c2, c3); + -c1, -c2, -c3); } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 4af37e365e..15b175a2c4 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -196,12 +196,9 @@ generate_tri_edge_mask(LLVMBuilderRef builder, c0_vec = splat(c0) c1_vec = splat(c1) c2_vec = splat(c2) - s0_vec = c0_vec + step0_ptr[i] - s1_vec = c1_vec + step1_ptr[i] - s2_vec = c2_vec + step2_ptr[i] - m0_vec = s0_vec > {0,0,0,0} - m1_vec = s1_vec > {0,0,0,0} - m2_vec = s2_vec > {0,0,0,0} + m0_vec = step0_ptr[i] > c0_vec + m1_vec = step1_ptr[i] > c1_vec + m2_vec = step2_ptr[i] > c2_vec mask = m0_vec & m1_vec & m2_vec */ struct lp_type i32_type; @@ -211,7 +208,6 @@ generate_tri_edge_mask(LLVMBuilderRef builder, LLVMValueRef c0_vec, c1_vec, c2_vec; LLVMValueRef step0_vec, step1_vec, step2_vec; LLVMValueRef m0_vec, m1_vec, m2_vec; - LLVMValueRef s0_vec, s1_vec, s2_vec; LLVMValueRef m; LLVMValueRef zeros; @@ -240,21 +236,13 @@ generate_tri_edge_mask(LLVMBuilderRef builder, step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), ""); step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), ""); - /** XXX with a little work, we could remove the add here and just - * compare c0_vec > step0_vec. - */ - s0_vec = LLVMBuildAdd(builder, c0_vec, step0_vec, ""); - s1_vec = LLVMBuildAdd(builder, c1_vec, step1_vec, ""); - s2_vec = LLVMBuildAdd(builder, c2_vec, step2_vec, ""); - m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, s0_vec, zeros); - m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, s1_vec, zeros); - m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, s2_vec, zeros); + m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step0_vec, c0_vec); + m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step1_vec, c1_vec); + m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step2_vec, c2_vec); m = LLVMBuildAnd(builder, m0_vec, m1_vec, ""); m = LLVMBuildAnd(builder, m, m2_vec, ""); - lp_build_name(m, "m"); - *mask = m; } -- cgit v1.2.3 From 808170a0ff6c3a51a1b69a54ed8045b2e0f7d0d1 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 17 Dec 2009 09:00:58 -0700 Subject: llvmpipe: replace INT_MIN/2 with INT_MIN Since changing the in/out test we can just use INT_MIN to be sure the comparison against the step values always passes. --- src/gallium/drivers/llvmpipe/lp_rast.c | 2 +- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 015865a6d6..24393c8e89 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -281,7 +281,7 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { /* Set c1,c2,c3 to large values so the in/out test always passes */ - const int32_t c1 = INT_MIN/2, c2 = INT_MIN/2, c3 = INT_MIN/2; + const int32_t c1 = INT_MIN, c2 = INT_MIN, c3 = INT_MIN; const struct lp_rast_shader_inputs *inputs = arg.shade_tile; const unsigned tile_x = rast->tasks[thread_index].x; const unsigned tile_y = rast->tasks[thread_index].y; diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index d6e8d6d5ab..bc7397f50c 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -90,7 +90,7 @@ block_full_4( struct lp_rasterizer_task *rast_task, int x, int y ) { /* Set c1,c2,c3 to large values so the in/out test always passes */ - const int32_t c1 = INT_MIN/2, c2 = INT_MIN/2, c3 = INT_MIN/2; + const int32_t c1 = INT_MIN, c2 = INT_MIN, c3 = INT_MIN; lp_rast_shade_quads(rast_task->rast, rast_task->thread_index, &tri->inputs, -- cgit v1.2.3 From 5771f3d483e882d9f5b6c5f3bdb3c39696623b66 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 17 Dec 2009 10:52:50 -0700 Subject: llvmpipe: remove unused code, added comments, etc --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 15b175a2c4..7ed727dbbc 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -210,8 +210,6 @@ generate_tri_edge_mask(LLVMBuilderRef builder, LLVMValueRef m0_vec, m1_vec, m2_vec; LLVMValueRef m; - LLVMValueRef zeros; - assert(i < 4); /* int32 vector type */ @@ -224,18 +222,27 @@ generate_tri_edge_mask(LLVMBuilderRef builder, i32vec4_type = lp_build_int32_vec4_type(); - /* int32_vec4 zero = {0,0,0,0} */ - zeros = LLVMConstNull(i32vec4_type); - + /* c0_vec = {c0, c0, c0, c0} + * Note that we emit this code four times but LLVM optimizes away + * three instances of it. + */ c0_vec = lp_build_broadcast(builder, i32vec4_type, c0); c1_vec = lp_build_broadcast(builder, i32vec4_type, c1); c2_vec = lp_build_broadcast(builder, i32vec4_type, c2); + lp_build_name(c0_vec, "edgeconst0vec"); + lp_build_name(c1_vec, "edgeconst1vec"); + lp_build_name(c2_vec, "edgeconst2vec"); + index = LLVMConstInt(LLVMInt32Type(), i, 0); step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), ""); step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), ""); step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), ""); + lp_build_name(step0_vec, "step0vec"); + lp_build_name(step1_vec, "step1vec"); + lp_build_name(step2_vec, "step2vec"); + m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step0_vec, c0_vec); m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step1_vec, c1_vec); m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step2_vec, c2_vec); @@ -243,7 +250,13 @@ generate_tri_edge_mask(LLVMBuilderRef builder, m = LLVMBuildAnd(builder, m0_vec, m1_vec, ""); m = LLVMBuildAnd(builder, m, m2_vec, ""); + lp_build_name(m, "inoutmaskvec"); + *mask = m; + + /* + * if mask = {0,0,0,0} skip quad + */ } @@ -309,6 +322,7 @@ generate_fs(struct llvmpipe_context *lp, generate_tri_edge_mask(builder, i, pmask, c0, c1, c2, step0_ptr, step1_ptr, step2_ptr); + /* 'mask' will control execution based on quad's pixel alive/killed state */ lp_build_mask_begin(&mask, flow, type, *pmask); -- cgit v1.2.3 From 7d9b97703aba0c751e2cf10025859cbfe66074b4 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 17 Dec 2009 14:22:43 -0700 Subject: llvmpipe: added function comments --- src/gallium/drivers/llvmpipe/lp_bld_flow.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index dcc25fbff8..420d062fc7 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -402,6 +402,13 @@ lp_build_mask_check(struct lp_build_mask_context *mask) } +/** + * Begin a section of code which is predicated on a mask. + * \param mask the mask context, initialized here + * \param flow the flow context + * \param type the type of the mask + * \param value storage for the mask + */ void lp_build_mask_begin(struct lp_build_mask_context *mask, struct lp_build_flow_context *flow, @@ -422,6 +429,11 @@ lp_build_mask_begin(struct lp_build_mask_context *mask, } +/** + * Update boolean mask with given value (bitwise AND). + * Typically used to update the quad's pixel alive/killed mask + * after depth testing, alpha testing, TGSI_OPCODE_KIL, etc. + */ void lp_build_mask_update(struct lp_build_mask_context *mask, LLVMValueRef value) @@ -432,6 +444,9 @@ lp_build_mask_update(struct lp_build_mask_context *mask, } +/** + * End section of code which is predicated on a mask. + */ LLVMValueRef lp_build_mask_end(struct lp_build_mask_context *mask) { -- cgit v1.2.3 From aeb6351a0961534e77771b962c296485b98b79fe Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 17 Dec 2009 14:26:48 -0700 Subject: llvmpipe: fix upper/lower-case typo --- src/gallium/drivers/llvmpipe/lp_bld_flow.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index 420d062fc7..fe9c6941f7 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -46,7 +46,7 @@ * Enumeration of all possible flow constructs. */ enum lp_build_flow_construct_kind { - lP_BUILD_FLOW_SCOPE, + LP_BUILD_FLOW_SCOPE, LP_BUILD_FLOW_SKIP, }; @@ -200,7 +200,7 @@ lp_build_flow_scope_begin(struct lp_build_flow_context *flow) { struct lp_build_flow_scope *scope; - scope = &lp_build_flow_push(flow, lP_BUILD_FLOW_SCOPE)->scope; + scope = &lp_build_flow_push(flow, LP_BUILD_FLOW_SCOPE)->scope; if(!scope) return; @@ -241,7 +241,7 @@ lp_build_flow_scope_declare(struct lp_build_flow_context *flow, { struct lp_build_flow_scope *scope; - scope = &lp_build_flow_peek(flow, lP_BUILD_FLOW_SCOPE)->scope; + scope = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SCOPE)->scope; if(!scope) return; @@ -263,7 +263,7 @@ lp_build_flow_scope_end(struct lp_build_flow_context *flow) { struct lp_build_flow_scope *scope; - scope = &lp_build_flow_pop(flow, lP_BUILD_FLOW_SCOPE)->scope; + scope = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SCOPE)->scope; if(!scope) return; -- cgit v1.2.3 From 04794080d2553a53f34a48b1aec7d2a630396d92 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 18 Dec 2009 11:12:59 -0700 Subject: llvmpipe: change configs/linux-llvm from debug build to optimized build Basically equivalent to the SCons non-debug build now. --- configs/linux-llvm | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/configs/linux-llvm b/configs/linux-llvm index 19b53cc546..ace4619b36 100644 --- a/configs/linux-llvm +++ b/configs/linux-llvm @@ -1,5 +1,5 @@ # -*-makefile-*- -# Configuration for Linux and LLVM with debugging info +# Configuration for Linux and LLVM with optimizations # Builds the llvmpipe gallium driver include $(TOP)/configs/linux @@ -11,8 +11,10 @@ CONFIG_NAME = linux-llvm # Add llvmpipe driver GALLIUM_DRIVERS_DIRS += llvmpipe -OPT_FLAGS = -g -ansi -pedantic -DEFINES += -DDEBUG -DDEBUG_MATH -DGALLIUM_LLVMPIPE -DHAVE_UDIS86 +OPT_FLAGS = -O3 -ansi -pedantic +ARCH_FLAGS = -m32 -mmmx -msse -msse2 -mstackrealign + +DEFINES += -DNDEBUG -DGALLIUM_LLVMPIPE -DHAVE_UDIS86 # override -std=c99 CFLAGS += -std=gnu99 -- cgit v1.2.3 From 6a7b6a530dd7740457d4bdd3b804c6eabff4e1b3 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 18 Dec 2009 11:17:06 -0700 Subject: llvmpipe: added linux-llvm-debug configuration --- Makefile | 1 + configs/linux-llvm-debug | 12 ++++++++++++ 2 files changed, 13 insertions(+) create mode 100644 configs/linux-llvm-debug diff --git a/Makefile b/Makefile index 413a1b4372..934f85b5ab 100644 --- a/Makefile +++ b/Makefile @@ -126,6 +126,7 @@ linux-ia64-icc-static \ linux-icc \ linux-icc-static \ linux-llvm \ +linux-llvm-debug \ linux-osmesa \ linux-osmesa-static \ linux-osmesa16 \ diff --git a/configs/linux-llvm-debug b/configs/linux-llvm-debug new file mode 100644 index 0000000000..28bcfdb187 --- /dev/null +++ b/configs/linux-llvm-debug @@ -0,0 +1,12 @@ +# -*-makefile-*- +# Configuration for Linux and LLVM with debugging info +# Builds the llvmpipe gallium driver + +include $(TOP)/configs/linux-llvm + +CONFIG_NAME = linux-llvm-debug + +OPT_FLAGS = -g -ansi -pedantic + +DEFINES += -DDEBUG -UNDEBUG + -- cgit v1.2.3 From 5ce0380a0f585b9e1fb616b749f7fd18a8afada1 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 6 Jan 2010 16:44:43 +0000 Subject: llvmpipe: merge setup and draw vbuf submodules The setup tiling engine is now plugged directly into the draw module as a rendering backend. Removed a couple of layering violations such that the setup code no longer reaches out into the surrounding llvmpipe state or context. --- src/gallium/drivers/llvmpipe/Makefile | 2 +- src/gallium/drivers/llvmpipe/SConscript | 2 +- src/gallium/drivers/llvmpipe/lp_context.c | 18 +- src/gallium/drivers/llvmpipe/lp_context.h | 15 - src/gallium/drivers/llvmpipe/lp_draw_arrays.c | 2 - src/gallium/drivers/llvmpipe/lp_prim_vbuf.c | 559 ------------------------ src/gallium/drivers/llvmpipe/lp_prim_vbuf.h | 38 -- src/gallium/drivers/llvmpipe/lp_setup.c | 111 ++--- src/gallium/drivers/llvmpipe/lp_setup.h | 34 +- src/gallium/drivers/llvmpipe/lp_setup_context.h | 29 +- src/gallium/drivers/llvmpipe/lp_setup_vbuf.c | 520 ++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_state_derived.c | 223 +++------- src/gallium/drivers/llvmpipe/lp_state_fs.c | 3 +- 13 files changed, 698 insertions(+), 858 deletions(-) delete mode 100644 src/gallium/drivers/llvmpipe/lp_prim_vbuf.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_prim_vbuf.h create mode 100644 src/gallium/drivers/llvmpipe/lp_setup_vbuf.c diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 345326e33d..6ec97046e1 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -35,13 +35,13 @@ C_SOURCES = \ lp_fence.c \ lp_flush.c \ lp_jit.c \ - lp_prim_vbuf.c \ lp_rast.c \ lp_rast_tri.c \ lp_setup.c \ lp_setup_line.c \ lp_setup_point.c \ lp_setup_tri.c \ + lp_setup_vbuf.c \ lp_query.c \ lp_screen.c \ lp_state_blend.c \ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index f0b71ef3ee..ae4303bd24 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -46,7 +46,6 @@ llvmpipe = env.ConvenienceLibrary( 'lp_fence.c', 'lp_flush.c', 'lp_jit.c', - 'lp_prim_vbuf.c', 'lp_query.c', 'lp_scene.c', 'lp_scene_queue.c', @@ -55,6 +54,7 @@ llvmpipe = env.ConvenienceLibrary( 'lp_setup_line.c', 'lp_setup_point.c', 'lp_setup_tri.c', + 'lp_setup_vbuf.c', 'lp_state_blend.c', 'lp_state_clip.c', 'lp_state_derived.c', diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 06aa032540..0457ccc8a9 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -31,14 +31,12 @@ */ #include "draw/draw_context.h" -#include "draw/draw_vbuf.h" #include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_memory.h" #include "lp_clear.h" #include "lp_context.h" #include "lp_flush.h" -#include "lp_prim_vbuf.h" #include "lp_state.h" #include "lp_surface.h" #include "lp_texture.h" @@ -179,23 +177,11 @@ llvmpipe_create( struct pipe_screen *screen ) if (debug_get_bool_option( "LP_NO_RAST", FALSE )) llvmpipe->no_rast = TRUE; - llvmpipe->setup = lp_setup_create( screen ); + llvmpipe->setup = lp_setup_create( screen, + llvmpipe->draw ); if (!llvmpipe->setup) goto fail; - llvmpipe->vbuf_backend = lp_create_vbuf_backend(llvmpipe); - if (!llvmpipe->vbuf_backend) - goto fail; - - llvmpipe->vbuf = draw_vbuf_stage(llvmpipe->draw, llvmpipe->vbuf_backend); - if (!llvmpipe->vbuf) - goto fail; - - draw_set_rasterize_stage(llvmpipe->draw, llvmpipe->vbuf); - draw_set_render(llvmpipe->draw, llvmpipe->vbuf_backend); - - - /* plug in AA line/point stages */ draw_install_aaline_stage(llvmpipe->draw, &llvmpipe->pipe); draw_install_aapoint_stage(llvmpipe->draw, &llvmpipe->pipe); diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 17c6939ff5..b796148457 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -93,17 +93,6 @@ struct llvmpipe_context { /** Which vertex shader output slot contains point size */ int psize_slot; - /* The reduced version of the primitive supplied by the state - * tracker. - */ - unsigned reduced_api_prim; - - /* The reduced primitive after unfilled triangles, wide-line - * decomposition, etc, are taken into account. This is the - * primitive actually rasterized. - */ - unsigned reduced_prim; - /** Derived from scissor and surface bounds: */ struct pipe_scissor_state cliprect; @@ -113,10 +102,6 @@ struct llvmpipe_context { /** The primitive drawing context */ struct draw_context *draw; - /** Draw module backend */ - struct vbuf_render *vbuf_backend; - struct draw_stage *vbuf; - unsigned tex_timestamp; boolean no_rast; diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index b879b5e755..91fcbc01c6 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -70,8 +70,6 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, struct draw_context *draw = lp->draw; unsigned i; - lp->reduced_api_prim = u_reduced_prim(mode); - if (lp->dirty) llvmpipe_update_derived( lp ); diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c deleted file mode 100644 index 925e6f8b3b..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c +++ /dev/null @@ -1,559 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Interface between 'draw' module's output and the llvmpipe rasterizer/setup - * code. When the 'draw' module has finished filling a vertex buffer, the - * draw_arrays() functions below will be called. Loop over the vertices and - * call the point/line/tri setup functions. - * - * Authors - * Brian Paul - */ - - -#include "lp_context.h" -#include "lp_state.h" -#include "lp_prim_vbuf.h" -#include "lp_setup.h" -#include "draw/draw_context.h" -#include "draw/draw_vbuf.h" -#include "util/u_memory.h" -#include "util/u_prim.h" - - -#define LP_MAX_VBUF_INDEXES 1024 -#define LP_MAX_VBUF_SIZE 4096 - -typedef const float (*cptrf4)[4]; - -/** - * Subclass of vbuf_render. - */ -struct llvmpipe_vbuf_render -{ - struct vbuf_render base; - struct llvmpipe_context *llvmpipe; - struct setup_context *setup; - - uint prim; - uint vertex_size; - uint nr_vertices; - uint vertex_buffer_size; - void *vertex_buffer; -}; - - -/** cast wrapper */ -static struct llvmpipe_vbuf_render * -llvmpipe_vbuf_render(struct vbuf_render *vbr) -{ - return (struct llvmpipe_vbuf_render *) vbr; -} - - - - - - - -static const struct vertex_info * -lp_vbuf_get_vertex_info(struct vbuf_render *vbr) -{ - struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - return llvmpipe_get_vbuf_vertex_info(cvbr->llvmpipe); -} - - -static boolean -lp_vbuf_allocate_vertices(struct vbuf_render *vbr, - ushort vertex_size, ushort nr_vertices) -{ - struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - unsigned size = vertex_size * nr_vertices; - - if (cvbr->vertex_buffer_size < size) { - align_free(cvbr->vertex_buffer); - cvbr->vertex_buffer = align_malloc(size, 16); - cvbr->vertex_buffer_size = size; - } - - cvbr->vertex_size = vertex_size; - cvbr->nr_vertices = nr_vertices; - - return cvbr->vertex_buffer != NULL; -} - -static void -lp_vbuf_release_vertices(struct vbuf_render *vbr) -{ - /* keep the old allocation for next time */ -} - -static void * -lp_vbuf_map_vertices(struct vbuf_render *vbr) -{ - struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - return cvbr->vertex_buffer; -} - -static void -lp_vbuf_unmap_vertices(struct vbuf_render *vbr, - ushort min_index, - ushort max_index ) -{ - struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - assert( cvbr->vertex_buffer_size >= (max_index+1) * cvbr->vertex_size ); - /* do nothing */ -} - - -static boolean -lp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) -{ - struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - - llvmpipe_update_derived( cvbr->llvmpipe ); - - cvbr->llvmpipe->reduced_prim = u_reduced_prim(prim); - cvbr->prim = prim; - return TRUE; - -} - - -static INLINE cptrf4 get_vert( const void *vertex_buffer, - int index, - int stride ) -{ - return (cptrf4)((char *)vertex_buffer + index * stride); -} - - -/** - * draw elements / indexed primitives - */ -static void -lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) -{ - struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - struct llvmpipe_context *llvmpipe = cvbr->llvmpipe; - const unsigned stride = llvmpipe->vertex_info_vbuf.size * sizeof(float); - const void *vertex_buffer = cvbr->vertex_buffer; - struct setup_context *setup_ctx = cvbr->setup; - unsigned i; - - switch (cvbr->prim) { - case PIPE_PRIM_POINTS: - for (i = 0; i < nr; i++) { - lp_setup_point( setup_ctx, - get_vert(vertex_buffer, indices[i-0], stride) ); - } - break; - - case PIPE_PRIM_LINES: - for (i = 1; i < nr; i += 2) { - lp_setup_line( setup_ctx, - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } - break; - - case PIPE_PRIM_LINE_STRIP: - for (i = 1; i < nr; i ++) { - lp_setup_line( setup_ctx, - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } - break; - - case PIPE_PRIM_LINE_LOOP: - for (i = 1; i < nr; i ++) { - lp_setup_line( setup_ctx, - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } - if (nr) { - lp_setup_line( setup_ctx, - get_vert(vertex_buffer, indices[nr-1], stride), - get_vert(vertex_buffer, indices[0], stride) ); - } - break; - - case PIPE_PRIM_TRIANGLES: - if (llvmpipe->rasterizer->flatshade_first) { - for (i = 2; i < nr; i += 3) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[i-2], stride) ); - } - } - else { - for (i = 2; i < nr; i += 3) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } - } - break; - - case PIPE_PRIM_TRIANGLE_STRIP: - if (llvmpipe->rasterizer->flatshade_first) { - for (i = 2; i < nr; i += 1) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i+(i&1)-1], stride), - get_vert(vertex_buffer, indices[i-(i&1)], stride), - get_vert(vertex_buffer, indices[i-2], stride) ); - } - } - else { - for (i = 2; i < nr; i += 1) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i+(i&1)-2], stride), - get_vert(vertex_buffer, indices[i-(i&1)-1], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } - } - break; - - case PIPE_PRIM_TRIANGLE_FAN: - if (llvmpipe->rasterizer->flatshade_first) { - for (i = 2; i < nr; i += 1) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[0], stride), - get_vert(vertex_buffer, indices[i-1], stride) ); - } - } - else { - for (i = 2; i < nr; i += 1) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[0], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } - } - break; - - case PIPE_PRIM_QUADS: - if (llvmpipe->rasterizer->flatshade_first) { - for (i = 3; i < nr; i += 4) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-3], stride) ); - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[i-3], stride) ); - } - } - else { - for (i = 3; i < nr; i += 4) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-3], stride), - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } - } - break; - - case PIPE_PRIM_QUAD_STRIP: - if (llvmpipe->rasterizer->flatshade_first) { - for (i = 3; i < nr; i += 2) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-3], stride)); - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[i-3], stride) ); - } - } - else { - for (i = 3; i < nr; i += 2) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-3], stride), - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-3], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } - } - break; - - case PIPE_PRIM_POLYGON: - /* Almost same as tri fan but the _first_ vertex specifies the flat - * shading color. Note that the first polygon vertex is passed as - * the last triangle vertex here. - * flatshade_first state makes no difference. - */ - for (i = 2; i < nr; i += 1) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[0], stride) ); - } - break; - - default: - assert(0); - } -} - - -/** - * This function is hit when the draw module is working in pass-through mode. - * It's up to us to convert the vertex array into point/line/tri prims. - */ -static void -lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) -{ - struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - struct llvmpipe_context *llvmpipe = cvbr->llvmpipe; - struct setup_context *setup_ctx = cvbr->setup; - const unsigned stride = llvmpipe->vertex_info_vbuf.size * sizeof(float); - const void *vertex_buffer = - (void *) get_vert(cvbr->vertex_buffer, start, stride); - unsigned i; - - switch (cvbr->prim) { - case PIPE_PRIM_POINTS: - for (i = 0; i < nr; i++) { - lp_setup_point( setup_ctx, - get_vert(vertex_buffer, i-0, stride) ); - } - break; - - case PIPE_PRIM_LINES: - for (i = 1; i < nr; i += 2) { - lp_setup_line( setup_ctx, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride) ); - } - break; - - case PIPE_PRIM_LINE_STRIP: - for (i = 1; i < nr; i ++) { - lp_setup_line( setup_ctx, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride) ); - } - break; - - case PIPE_PRIM_LINE_LOOP: - for (i = 1; i < nr; i ++) { - lp_setup_line( setup_ctx, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride) ); - } - if (nr) { - lp_setup_line( setup_ctx, - get_vert(vertex_buffer, nr-1, stride), - get_vert(vertex_buffer, 0, stride) ); - } - break; - - case PIPE_PRIM_TRIANGLES: - if (llvmpipe->rasterizer->flatshade_first) { - for (i = 2; i < nr; i += 3) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, i-2, stride) ); - } - } - else { - for (i = 2; i < nr; i += 3) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride) ); - } - } - break; - - case PIPE_PRIM_TRIANGLE_STRIP: - if (llvmpipe->rasterizer->flatshade_first) { - for (i = 2; i < nr; i++) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i+(i&1)-1, stride), - get_vert(vertex_buffer, i-(i&1), stride), - get_vert(vertex_buffer, i-2, stride) ); - } - } - else { - for (i = 2; i < nr; i++) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i+(i&1)-2, stride), - get_vert(vertex_buffer, i-(i&1)-1, stride), - get_vert(vertex_buffer, i-0, stride) ); - } - } - break; - - case PIPE_PRIM_TRIANGLE_FAN: - if (llvmpipe->rasterizer->flatshade_first) { - for (i = 2; i < nr; i += 1) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, 0, stride), - get_vert(vertex_buffer, i-1, stride) ); - } - } - else { - for (i = 2; i < nr; i += 1) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, 0, stride), - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride) ); - } - } - break; - - case PIPE_PRIM_QUADS: - if (llvmpipe->rasterizer->flatshade_first) { - for (i = 3; i < nr; i += 4) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-3, stride) ); - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, i-3, stride) ); - } - } - else { - for (i = 3; i < nr; i += 4) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-3, stride), - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-0, stride) ); - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride) ); - } - } - break; - - case PIPE_PRIM_QUAD_STRIP: - if (llvmpipe->rasterizer->flatshade_first) { - for (i = 3; i < nr; i += 2) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-3, stride) ); - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, i-3, stride) ); - } - } - else { - for (i = 3; i < nr; i += 2) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-3, stride), - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-0, stride) ); - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-3, stride), - get_vert(vertex_buffer, i-0, stride) ); - } - } - break; - - case PIPE_PRIM_POLYGON: - /* Almost same as tri fan but the _first_ vertex specifies the flat - * shading color. Note that the first polygon vertex is passed as - * the last triangle vertex here. - * flatshade_first state makes no difference. - */ - for (i = 2; i < nr; i += 1) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, 0, stride) ); - } - break; - - default: - assert(0); - } -} - - - -static void -lp_vbuf_destroy(struct vbuf_render *vbr) -{ - FREE(vbr); -} - - -/** - * Create the post-transform vertex handler for the given context. - */ -struct vbuf_render * -lp_create_vbuf_backend(struct llvmpipe_context *lp) -{ - struct llvmpipe_vbuf_render *cvbr = CALLOC_STRUCT(llvmpipe_vbuf_render); - - assert(lp->draw); - assert(lp->setup); - - - cvbr->base.max_indices = LP_MAX_VBUF_INDEXES; - cvbr->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE; - - cvbr->base.get_vertex_info = lp_vbuf_get_vertex_info; - cvbr->base.allocate_vertices = lp_vbuf_allocate_vertices; - cvbr->base.map_vertices = lp_vbuf_map_vertices; - cvbr->base.unmap_vertices = lp_vbuf_unmap_vertices; - cvbr->base.set_primitive = lp_vbuf_set_primitive; - cvbr->base.draw = lp_vbuf_draw; - cvbr->base.draw_arrays = lp_vbuf_draw_arrays; - cvbr->base.release_vertices = lp_vbuf_release_vertices; - cvbr->base.destroy = lp_vbuf_destroy; - - cvbr->llvmpipe = lp; - cvbr->setup = lp->setup; - - return &cvbr->base; -} diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h deleted file mode 100644 index 0676e2f42a..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h +++ /dev/null @@ -1,38 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef LP_VBUF_H -#define LP_VBUF_H - - -struct llvmpipe_context; - -extern struct vbuf_render * -lp_create_vbuf_backend(struct llvmpipe_context *llvmpipe); - - -#endif /* LP_VBUF_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index e361e5df63..e2b21aed47 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -39,19 +39,22 @@ #include "util/u_surface.h" #include "lp_scene.h" #include "lp_scene_queue.h" -#include "lp_debug.h" -#include "lp_fence.h" -#include "lp_state.h" #include "lp_buffer.h" #include "lp_texture.h" +#include "lp_debug.h" +#include "lp_fence.h" +#include "lp_rast.h" #include "lp_setup_context.h" +#include "draw/draw_context.h" +#include "draw/draw_vbuf.h" + /** XXX temporary value, temporary here */ #define MAX_SCENES 2 -static void set_state( struct setup_context *, unsigned ); +static void set_scene_state( struct setup_context *, unsigned ); struct lp_scene * @@ -76,7 +79,7 @@ first_triangle( struct setup_context *setup, const float (*v1)[4], const float (*v2)[4]) { - set_state( setup, SETUP_ACTIVE ); + set_scene_state( setup, SETUP_ACTIVE ); lp_setup_choose_triangle( setup ); setup->triangle( setup, v0, v1, v2 ); } @@ -86,7 +89,7 @@ first_line( struct setup_context *setup, const float (*v0)[4], const float (*v1)[4]) { - set_state( setup, SETUP_ACTIVE ); + set_scene_state( setup, SETUP_ACTIVE ); lp_setup_choose_line( setup ); setup->line( setup, v0, v1 ); } @@ -95,7 +98,7 @@ static void first_point( struct setup_context *setup, const float (*v0)[4]) { - set_state( setup, SETUP_ACTIVE ); + set_scene_state( setup, SETUP_ACTIVE ); lp_setup_choose_point( setup ); setup->point( setup, v0 ); } @@ -194,7 +197,7 @@ execute_clears( struct setup_context *setup ) static void -set_state( struct setup_context *setup, +set_scene_state( struct setup_context *setup, unsigned new_state ) { unsigned old_state = setup->state; @@ -234,7 +237,7 @@ lp_setup_flush( struct setup_context *setup, { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - set_state( setup, SETUP_FLUSHED ); + set_scene_state( setup, SETUP_FLUSHED ); } @@ -246,7 +249,7 @@ lp_setup_bind_framebuffer( struct setup_context *setup, LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - set_state( setup, SETUP_FLUSHED ); + set_scene_state( setup, SETUP_FLUSHED ); util_copy_framebuffer_state(&setup->fb, fb); @@ -302,7 +305,7 @@ lp_setup_clear( struct setup_context *setup, * buffers which the app or state-tracker might issue * separately. */ - set_state( setup, SETUP_CLEARED ); + set_scene_state( setup, SETUP_CLEARED ); setup->clear.flags |= flags; } @@ -321,7 +324,7 @@ lp_setup_fence( struct setup_context *setup ) LP_DBG(DEBUG_SETUP, "%s rank %u\n", __FUNCTION__, rank); - set_state( setup, SETUP_ACTIVE ); + set_scene_state( setup, SETUP_ACTIVE ); /* insert the fence into all command bins */ lp_scene_bin_everywhere( scene, @@ -358,13 +361,13 @@ lp_setup_set_fs_inputs( struct setup_context *setup, } void -lp_setup_set_fs( struct setup_context *setup, - struct lp_fragment_shader *fs ) +lp_setup_set_fs_function( struct setup_context *setup, + lp_jit_frag_func jit_function ) { - LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) fs); + LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) jit_function); /* FIXME: reference count */ - setup->fs.current.jit_function = fs ? fs->current->jit_function : NULL; + setup->fs.current.jit_function = jit_function; setup->dirty |= LP_SETUP_NEW_FS; } @@ -406,6 +409,25 @@ lp_setup_set_blend_color( struct setup_context *setup, } } + +void +lp_setup_set_flatshade_first( struct setup_context *setup, + boolean flatshade_first ) +{ + setup->flatshade_first = flatshade_first; +} + + +void +lp_setup_set_vertex_info( struct setup_context *setup, + struct vertex_info *vertex_info ) +{ + /* XXX: just silently holding onto the pointer: + */ + setup->vertex_info = vertex_info; +} + + void lp_setup_set_sampler_textures( struct setup_context *setup, unsigned num, struct pipe_texture **texture) @@ -452,8 +474,8 @@ lp_setup_is_texture_referenced( struct setup_context *setup, } -static INLINE void -lp_setup_update_shader_state( struct setup_context *setup ) +void +lp_setup_update_state( struct setup_context *setup ) { struct lp_scene *scene = lp_setup_get_current_scene(setup); @@ -548,36 +570,6 @@ lp_setup_update_shader_state( struct setup_context *setup ) } -/* Stubs for lines & points for now: - */ -void -lp_setup_point(struct setup_context *setup, - const float (*v0)[4]) -{ - lp_setup_update_shader_state(setup); - setup->point( setup, v0 ); -} - -void -lp_setup_line(struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4]) -{ - lp_setup_update_shader_state(setup); - setup->line( setup, v0, v1 ); -} - -void -lp_setup_tri(struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4]) -{ - LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - - lp_setup_update_shader_state(setup); - setup->triangle( setup, v0, v1, v2 ); -} void @@ -602,11 +594,13 @@ lp_setup_destroy( struct setup_context *setup ) /** - * Create a new primitive tiling engine. Currently also creates a - * rasterizer to use with it. + * Create a new primitive tiling engine. Plug it into the backend of + * the draw module. Currently also creates a rasterizer to use with + * it. */ struct setup_context * -lp_setup_create( struct pipe_screen *screen ) +lp_setup_create( struct pipe_screen *screen, + struct draw_context *draw ) { unsigned i; struct setup_context *setup = CALLOC_STRUCT(setup_context); @@ -614,6 +608,8 @@ lp_setup_create( struct pipe_screen *screen ) if (!setup) return NULL; + lp_setup_init_vbuf(setup); + setup->empty_scenes = lp_scene_queue_create(); if (!setup->empty_scenes) goto fail; @@ -622,6 +618,13 @@ lp_setup_create( struct pipe_screen *screen ) if (!setup->rast) goto fail; + setup->vbuf = draw_vbuf_stage(draw, &setup->base); + if (!setup->vbuf) + goto fail; + + draw_set_rasterize_stage(draw, setup->vbuf); + draw_set_render(draw, &setup->base); + /* create some empty scenes */ for (i = 0; i < MAX_SCENES; i++) { struct lp_scene *scene = lp_scene_create(); @@ -637,6 +640,12 @@ lp_setup_create( struct pipe_screen *screen ) return setup; fail: + if (setup->rast) + lp_rast_destroy( setup->rast ); + + if (setup->vbuf) + ; + if (setup->empty_scenes) lp_scene_queue_destroy(setup->empty_scenes); diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 5c606e86af..a6120fcbe4 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -28,6 +28,10 @@ #define LP_SETUP_H #include "pipe/p_compiler.h" +#include "lp_jit.h" + +struct draw_context; +struct vertex_info; enum lp_interp { LP_INTERP_CONSTANT, @@ -58,7 +62,8 @@ struct lp_fragment_shader; struct lp_jit_context; struct setup_context * -lp_setup_create( struct pipe_screen *screen ); +lp_setup_create( struct pipe_screen *screen, + struct draw_context *draw ); void lp_setup_clear(struct setup_context *setup, @@ -71,22 +76,6 @@ struct pipe_fence_handle * lp_setup_fence( struct setup_context *setup ); -void -lp_setup_tri(struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4]); - -void -lp_setup_line(struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4]); - -void -lp_setup_point( struct setup_context *setup, - const float (*v0)[4] ); - - void lp_setup_flush( struct setup_context *setup, unsigned flags ); @@ -107,8 +96,8 @@ lp_setup_set_fs_inputs( struct setup_context *setup, unsigned nr ); void -lp_setup_set_fs( struct setup_context *setup, - struct lp_fragment_shader *fs ); +lp_setup_set_fs_function( struct setup_context *setup, + lp_jit_frag_func jit_function ); void lp_setup_set_fs_constants(struct setup_context *setup, @@ -131,6 +120,13 @@ boolean lp_setup_is_texture_referenced( struct setup_context *setup, const struct pipe_texture *texture ); +void +lp_setup_set_flatshade_first( struct setup_context *setup, + boolean flatshade_first ); + +void +lp_setup_set_vertex_info( struct setup_context *setup, + struct vertex_info *info ); void lp_setup_destroy( struct setup_context *setup ); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index f6604a8034..d2278a46e6 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -40,6 +40,7 @@ #include "lp_tile_soa.h" /* for TILE_SIZE */ #include "lp_scene.h" +#include "draw/draw_vbuf.h" #define LP_SETUP_NEW_FS 0x01 #define LP_SETUP_NEW_CONSTANTS 0x02 @@ -53,15 +54,31 @@ struct lp_scene_queue; * Point/line/triangle setup context. * Note: "stored" below indicates data which is stored in the bins, * not arbitrary malloc'd memory. + * + * + * Subclass of vbuf_render, plugged directly into the draw module as + * the rendering backend. */ -struct setup_context { - +struct setup_context +{ + struct vbuf_render base; + + struct vertex_info *vertex_info; + uint prim; + uint vertex_size; + uint nr_vertices; + uint vertex_buffer_size; + void *vertex_buffer; + + /* Final pipeline stage for draw module. Draw module should + * create/install this itself now. + */ + struct draw_stage *vbuf; struct lp_rasterizer *rast; - - struct lp_scene *scene; /**< current scene */ struct lp_scene_queue *empty_scenes; /**< queue of empty scenes */ + boolean flatshade_first; boolean ccw_is_frontface; unsigned cullmode; @@ -120,4 +137,8 @@ void lp_setup_choose_point( struct setup_context *setup ); struct lp_scene *lp_setup_get_current_scene(struct setup_context *setup); +void lp_setup_init_vbuf(struct setup_context *setup); + +void lp_setup_update_state( struct setup_context *setup ); + #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c new file mode 100644 index 0000000000..5cd4f354fd --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c @@ -0,0 +1,520 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Interface between 'draw' module's output and the llvmpipe rasterizer/setup + * code. When the 'draw' module has finished filling a vertex buffer, the + * draw_arrays() functions below will be called. Loop over the vertices and + * call the point/line/tri setup functions. + * + * Authors + * Brian Paul + */ + + +#include "lp_setup_context.h" +#include "draw/draw_context.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "util/u_memory.h" +#include "util/u_prim.h" + + +#define LP_MAX_VBUF_INDEXES 1024 +#define LP_MAX_VBUF_SIZE 4096 + + + +/** cast wrapper */ +static struct setup_context * +setup_context(struct vbuf_render *vbr) +{ + return (struct setup_context *) vbr; +} + + + +static const struct vertex_info * +lp_vbuf_get_vertex_info(struct vbuf_render *vbr) +{ + struct setup_context *setup = setup_context(vbr); + return setup->vertex_info; +} + + +static boolean +lp_vbuf_allocate_vertices(struct vbuf_render *vbr, + ushort vertex_size, ushort nr_vertices) +{ + struct setup_context *setup = setup_context(vbr); + unsigned size = vertex_size * nr_vertices; + + if (setup->vertex_buffer_size < size) { + align_free(setup->vertex_buffer); + setup->vertex_buffer = align_malloc(size, 16); + setup->vertex_buffer_size = size; + } + + setup->vertex_size = vertex_size; + setup->nr_vertices = nr_vertices; + + return setup->vertex_buffer != NULL; +} + +static void +lp_vbuf_release_vertices(struct vbuf_render *vbr) +{ + /* keep the old allocation for next time */ +} + +static void * +lp_vbuf_map_vertices(struct vbuf_render *vbr) +{ + struct setup_context *setup = setup_context(vbr); + return setup->vertex_buffer; +} + +static void +lp_vbuf_unmap_vertices(struct vbuf_render *vbr, + ushort min_index, + ushort max_index ) +{ + struct setup_context *setup = setup_context(vbr); + assert( setup->vertex_buffer_size >= (max_index+1) * setup->vertex_size ); + /* do nothing */ +} + + +static boolean +lp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) +{ + setup_context(vbr)->prim = prim; + return TRUE; +} + +typedef const float (*const_float4_ptr)[4]; + +static INLINE const_float4_ptr get_vert( const void *vertex_buffer, + int index, + int stride ) +{ + return (const_float4_ptr)((char *)vertex_buffer + index * stride); +} + +/** + * draw elements / indexed primitives + */ +static void +lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) +{ + struct setup_context *setup = setup_context(vbr); + const unsigned stride = setup->vertex_info->size * sizeof(float); + const void *vertex_buffer = setup->vertex_buffer; + unsigned i; + + lp_setup_update_state(setup); + + switch (setup->prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < nr; i++) { + setup->point( setup, + get_vert(vertex_buffer, indices[i-0], stride) ); + } + break; + + case PIPE_PRIM_LINES: + for (i = 1; i < nr; i += 2) { + setup->line( setup, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + break; + + case PIPE_PRIM_LINE_STRIP: + for (i = 1; i < nr; i ++) { + setup->line( setup, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + break; + + case PIPE_PRIM_LINE_LOOP: + for (i = 1; i < nr; i ++) { + setup->line( setup, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + if (nr) { + setup->line( setup, + get_vert(vertex_buffer, indices[nr-1], stride), + get_vert(vertex_buffer, indices[0], stride) ); + } + break; + + case PIPE_PRIM_TRIANGLES: + if (setup->flatshade_first) { + for (i = 2; i < nr; i += 3) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride), + get_vert(vertex_buffer, indices[i-2], stride) ); + } + } + else { + for (i = 2; i < nr; i += 3) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + if (setup->flatshade_first) { + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i+(i&1)-1], stride), + get_vert(vertex_buffer, indices[i-(i&1)], stride), + get_vert(vertex_buffer, indices[i-2], stride) ); + } + } + else { + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i+(i&1)-2], stride), + get_vert(vertex_buffer, indices[i-(i&1)-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + if (setup->flatshade_first) { + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-0], stride), + get_vert(vertex_buffer, indices[0], stride), + get_vert(vertex_buffer, indices[i-1], stride) ); + } + } + else { + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[0], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + } + break; + + case PIPE_PRIM_QUADS: + if (setup->flatshade_first) { + for (i = 3; i < nr; i += 4) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-3], stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride), + get_vert(vertex_buffer, indices[i-3], stride) ); + } + } + else { + for (i = 3; i < nr; i += 4) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + } + break; + + case PIPE_PRIM_QUAD_STRIP: + if (setup->flatshade_first) { + for (i = 3; i < nr; i += 2) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-0], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-3], stride)); + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-0], stride), + get_vert(vertex_buffer, indices[i-3], stride) ); + } + } + else { + for (i = 3; i < nr; i += 2) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + } + break; + + case PIPE_PRIM_POLYGON: + /* Almost same as tri fan but the _first_ vertex specifies the flat + * shading color. Note that the first polygon vertex is passed as + * the last triangle vertex here. + * flatshade_first state makes no difference. + */ + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-0], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[0], stride) ); + } + break; + + default: + assert(0); + } +} + + +/** + * This function is hit when the draw module is working in pass-through mode. + * It's up to us to convert the vertex array into point/line/tri prims. + */ +static void +lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) +{ + struct setup_context *setup = setup_context(vbr); + const unsigned stride = setup->vertex_info->size * sizeof(float); + const void *vertex_buffer = + (void *) get_vert(setup->vertex_buffer, start, stride); + unsigned i; + + lp_setup_update_state(setup); + + switch (setup->prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < nr; i++) { + setup->point( setup, + get_vert(vertex_buffer, i-0, stride) ); + } + break; + + case PIPE_PRIM_LINES: + for (i = 1; i < nr; i += 2) { + setup->line( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + break; + + case PIPE_PRIM_LINE_STRIP: + for (i = 1; i < nr; i ++) { + setup->line( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + break; + + case PIPE_PRIM_LINE_LOOP: + for (i = 1; i < nr; i ++) { + setup->line( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + if (nr) { + setup->line( setup, + get_vert(vertex_buffer, nr-1, stride), + get_vert(vertex_buffer, 0, stride) ); + } + break; + + case PIPE_PRIM_TRIANGLES: + if (setup->flatshade_first) { + for (i = 2; i < nr; i += 3) { + setup->triangle( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride), + get_vert(vertex_buffer, i-2, stride) ); + } + } + else { + for (i = 2; i < nr; i += 3) { + setup->triangle( setup, + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + if (setup->flatshade_first) { + for (i = 2; i < nr; i++) { + setup->triangle( setup, + get_vert(vertex_buffer, i+(i&1)-1, stride), + get_vert(vertex_buffer, i-(i&1), stride), + get_vert(vertex_buffer, i-2, stride) ); + } + } + else { + for (i = 2; i < nr; i++) { + setup->triangle( setup, + get_vert(vertex_buffer, i+(i&1)-2, stride), + get_vert(vertex_buffer, i-(i&1)-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + if (setup->flatshade_first) { + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, i-0, stride), + get_vert(vertex_buffer, 0, stride), + get_vert(vertex_buffer, i-1, stride) ); + } + } + else { + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, 0, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + } + break; + + case PIPE_PRIM_QUADS: + if (setup->flatshade_first) { + for (i = 3; i < nr; i += 4) { + setup->triangle( setup, + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-3, stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride), + get_vert(vertex_buffer, i-3, stride) ); + } + } + else { + for (i = 3; i < nr; i += 4) { + setup->triangle( setup, + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-0, stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + } + break; + + case PIPE_PRIM_QUAD_STRIP: + if (setup->flatshade_first) { + for (i = 3; i < nr; i += 2) { + setup->triangle( setup, + get_vert(vertex_buffer, i-0, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-3, stride) ); + setup->triangle( setup, + + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-0, stride), + get_vert(vertex_buffer, i-3, stride) ); + } + } + else { + for (i = 3; i < nr; i += 2) { + setup->triangle( setup, + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-0, stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + } + break; + + case PIPE_PRIM_POLYGON: + /* Almost same as tri fan but the _first_ vertex specifies the flat + * shading color. Note that the first polygon vertex is passed as + * the last triangle vertex here. + * flatshade_first state makes no difference. + */ + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride), + get_vert(vertex_buffer, 0, stride) ); + } + break; + + default: + assert(0); + } +} + + + +static void +lp_vbuf_destroy(struct vbuf_render *vbr) +{ + lp_setup_destroy(setup_context(vbr)); +} + + +/** + * Create the post-transform vertex handler for the given context. + */ +void +lp_setup_init_vbuf(struct setup_context *setup) +{ + setup->base.max_indices = LP_MAX_VBUF_INDEXES; + setup->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE; + + setup->base.get_vertex_info = lp_vbuf_get_vertex_info; + setup->base.allocate_vertices = lp_vbuf_allocate_vertices; + setup->base.map_vertices = lp_vbuf_map_vertices; + setup->base.unmap_vertices = lp_vbuf_unmap_vertices; + setup->base.set_primitive = lp_vbuf_set_primitive; + setup->base.draw = lp_vbuf_draw; + setup->base.draw_arrays = lp_vbuf_draw_arrays; + setup->base.release_vertices = lp_vbuf_release_vertices; + setup->base.destroy = lp_vbuf_destroy; +} diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index a18efcc0e0..ab827045ed 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -37,17 +37,6 @@ #include "lp_state.h" -/** - * Mark the current vertex layout as "invalid". - * We'll validate the vertex layout later, when we start to actually - * render a point or line or tri. - */ -static void -invalidate_vertex_layout(struct llvmpipe_context *llvmpipe) -{ - llvmpipe->vertex_info.num_attribs = 0; -} - /** * The vertex info describes how to convert the post-transformed vertices @@ -57,150 +46,95 @@ invalidate_vertex_layout(struct llvmpipe_context *llvmpipe) * This function validates the vertex layout and returns a pointer to a * vertex_info object. */ -struct vertex_info * -llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) +static void +compute_vertex_info(struct llvmpipe_context *llvmpipe) { - struct vertex_info *vinfo = &llvmpipe->vertex_info; - - if (vinfo->num_attribs == 0) { - /* compute vertex layout now */ - const struct lp_fragment_shader *lpfs = llvmpipe->fs; - const enum interp_mode colorInterp - = llvmpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; - struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf; - const uint num = draw_num_vs_outputs(llvmpipe->draw); - uint i; - - /* Tell draw_vbuf to simply emit the whole post-xform vertex - * as-is. No longer any need to try and emit draw vertex_header - * info. - */ - vinfo_vbuf->num_attribs = 0; - for (i = 0; i < num; i++) { - draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); - } - draw_compute_vertex_size(vinfo_vbuf); + const struct lp_fragment_shader *lpfs = llvmpipe->fs; + struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf; + const uint num = draw_num_vs_outputs(llvmpipe->draw); + uint i; + + /* Tell draw_vbuf to simply emit the whole post-xform vertex as-is. + * + * Not really sure if this is the best approach. + */ + vinfo_vbuf->num_attribs = 0; + for (i = 0; i < num; i++) { + draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); + } + draw_compute_vertex_size(vinfo_vbuf); - /* - * Loop over fragment shader inputs, searching for the matching output - * from the vertex shader. - */ - vinfo->num_attribs = 0; - for (i = 0; i < lpfs->info.num_inputs; i++) { - int src; - enum interp_mode interp; - switch (lpfs->info.input_interpolate[i]) { - case TGSI_INTERPOLATE_CONSTANT: - interp = INTERP_CONSTANT; - break; - case TGSI_INTERPOLATE_LINEAR: - interp = INTERP_LINEAR; - break; - case TGSI_INTERPOLATE_PERSPECTIVE: - interp = INTERP_PERSPECTIVE; - break; - default: - assert(0); - interp = INTERP_LINEAR; - } + lp_setup_set_vertex_info(llvmpipe->setup, vinfo_vbuf); - switch (lpfs->info.input_semantic_name[i]) { - case TGSI_SEMANTIC_POSITION: - src = draw_find_vs_output(llvmpipe->draw, - TGSI_SEMANTIC_POSITION, 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); - break; +/* + llvmpipe->psize_slot = draw_find_vs_output(llvmpipe->draw, + TGSI_SEMANTIC_PSIZE, 0); +*/ - case TGSI_SEMANTIC_COLOR: - src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_COLOR, - lpfs->info.input_semantic_index[i]); - draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); - break; + /* Now match FS inputs against emitted vertex data. It's also + * entirely possible to just have a fixed layout for FS input, + * determined by the fragment shader itself, and adjust the draw + * outputs to match that. + */ + { + struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS]; - case TGSI_SEMANTIC_FOG: - src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_FOG, 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); - break; + for (i = 0; i < lpfs->info.num_inputs; i++) { - case TGSI_SEMANTIC_GENERIC: + /* This can be precomputed, except for flatshade: + */ + switch (lpfs->info.input_semantic_name[i]) { case TGSI_SEMANTIC_FACE: - /* this includes texcoords and varying vars */ - src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_GENERIC, - lpfs->info.input_semantic_index[i]); - draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); + inputs[i].interp = LP_INTERP_FACING; + break; + case TGSI_SEMANTIC_POSITION: + inputs[i].interp = LP_INTERP_POSITION; + break; + case TGSI_SEMANTIC_COLOR: + /* Colors are linearly interpolated in the fragment shader + * even when flatshading is active. This just tells the + * setup module to use coefficients with ddx==0 and + * ddy==0. + */ + if (llvmpipe->rasterizer->flatshade) + inputs[i].interp = LP_INTERP_CONSTANT; + else + inputs[i].interp = LP_INTERP_LINEAR; break; default: - assert(0); - } - } - - llvmpipe->psize_slot = draw_find_vs_output(llvmpipe->draw, - TGSI_SEMANTIC_PSIZE, 0); - if (llvmpipe->psize_slot > 0) { - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, - llvmpipe->psize_slot); - } - - draw_compute_vertex_size(vinfo); - - { - struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS]; - - for (i = 0; i < lpfs->info.num_inputs; i++) { - switch (vinfo->attrib[i].interp_mode) { - case INTERP_CONSTANT: + switch (lpfs->info.input_interpolate[i]) { + case TGSI_INTERPOLATE_CONSTANT: inputs[i].interp = LP_INTERP_CONSTANT; break; - case INTERP_LINEAR: + case TGSI_INTERPOLATE_LINEAR: inputs[i].interp = LP_INTERP_LINEAR; break; - case INTERP_PERSPECTIVE: + case TGSI_INTERPOLATE_PERSPECTIVE: inputs[i].interp = LP_INTERP_PERSPECTIVE; break; - case INTERP_POS: - inputs[i].interp = LP_INTERP_POSITION; - break; default: assert(0); + break; } - - if (lpfs->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) - inputs[i].interp = LP_INTERP_FACING; - - inputs[i].src_index = vinfo->attrib[i].src_index; } - lp_setup_set_fs_inputs(llvmpipe->setup, inputs, lpfs->info.num_inputs); + /* Search for each input in current vs output: + */ + inputs[i].src_index = + draw_find_vs_output(llvmpipe->draw, + lpfs->info.input_semantic_name[i], + lpfs->info.input_semantic_index[i]); } - } - return vinfo; + lp_setup_set_fs_inputs(llvmpipe->setup, + inputs, + lpfs->info.num_inputs); + } } -/** - * Called from vbuf module. - * - * Note that there's actually two different vertex layouts in llvmpipe. - * - * The normal one is computed in llvmpipe_get_vertex_info() above and is - * used by the point/line/tri "setup" code. - * - * The other one (this one) is only used by the vbuf module (which is - * not normally used by default but used in testing). For the vbuf module, - * we basically want to pass-through the draw module's vertex layout as-is. - * When the llvmpipe vbuf code begins drawing, the normal vertex layout - * will come into play again. - */ -struct vertex_info * -llvmpipe_get_vbuf_vertex_info(struct llvmpipe_context *llvmpipe) -{ - (void) llvmpipe_get_vertex_info(llvmpipe); - return &llvmpipe->vertex_info_vbuf; -} - /** * Recompute cliprect from scissor bounds, scissor enable and surface size. @@ -273,7 +207,7 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) if (llvmpipe->dirty & (LP_NEW_RASTERIZER | LP_NEW_FS | LP_NEW_VS)) - invalidate_vertex_layout( llvmpipe ); + compute_vertex_info( llvmpipe ); if (llvmpipe->dirty & (LP_NEW_SCISSOR | LP_NEW_RASTERIZER | @@ -287,36 +221,23 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) LP_NEW_TEXTURE)) llvmpipe_update_fs( llvmpipe ); - if (llvmpipe->dirty & (LP_NEW_BLEND | - LP_NEW_DEPTH_STENCIL_ALPHA | - LP_NEW_SAMPLER | - LP_NEW_TEXTURE)) - llvmpipe_update_fs( llvmpipe ); - if (llvmpipe->dirty & LP_NEW_BLEND_COLOR) - lp_setup_set_blend_color(llvmpipe->setup, &llvmpipe->blend_color); + lp_setup_set_blend_color(llvmpipe->setup, + &llvmpipe->blend_color); if (llvmpipe->dirty & LP_NEW_DEPTH_STENCIL_ALPHA) - lp_setup_set_alpha_ref_value(llvmpipe->setup, llvmpipe->depth_stencil->alpha.ref_value); + lp_setup_set_alpha_ref_value(llvmpipe->setup, + llvmpipe->depth_stencil->alpha.ref_value); if (llvmpipe->dirty & LP_NEW_CONSTANTS) - lp_setup_set_fs_constants(llvmpipe->setup, llvmpipe->constants[PIPE_SHADER_FRAGMENT].buffer); + lp_setup_set_fs_constants(llvmpipe->setup, + llvmpipe->constants[PIPE_SHADER_FRAGMENT].buffer); if (llvmpipe->dirty & LP_NEW_TEXTURE) - lp_setup_set_sampler_textures(llvmpipe->setup, llvmpipe->num_textures, llvmpipe->texture); + lp_setup_set_sampler_textures(llvmpipe->setup, + llvmpipe->num_textures, + llvmpipe->texture); llvmpipe->dirty = 0; } - -#if 0 -void llvmpipe_prepare(struct lp_setup_context *setup) -{ - struct llvmpipe_context *lp = setup->llvmpipe; - - if (lp->dirty) { - llvmpipe_update_derived(lp); - } - -} -#endif diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 7ed727dbbc..3ad58415e3 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -891,5 +891,6 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) shader->current = variant; - lp_setup_set_fs(lp->setup, shader); + lp_setup_set_fs_function(lp->setup, + shader->current->jit_function); } -- cgit v1.2.3 From 72120292b981fd96e1127f927d7257255c65befd Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 6 Jan 2010 16:56:28 +0000 Subject: llvmpipe: restrict header visibility --- src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c index d2a6ae21f5..d59d768139 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c @@ -47,7 +47,7 @@ #include "lp_bld_intr.h" #include "lp_bld_sample.h" #include "lp_bld_tgsi.h" -#include "lp_state.h" +#include "lp_jit.h" #include "lp_tex_sample.h" -- cgit v1.2.3 From c9240c4c8f67a06403b29992ab96b9a48f68b01d Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 6 Jan 2010 17:00:26 +0000 Subject: llvmpipe: remove dead code --- src/gallium/drivers/llvmpipe/lp_clear.c | 1 - src/gallium/drivers/llvmpipe/lp_context.h | 1 - src/gallium/drivers/llvmpipe/lp_flush.c | 1 - src/gallium/drivers/llvmpipe/lp_rast.c | 1 - src/gallium/drivers/llvmpipe/lp_state.h | 7 ------- src/gallium/drivers/llvmpipe/lp_state_derived.c | 16 ++++++++-------- 6 files changed, 8 insertions(+), 19 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_clear.c b/src/gallium/drivers/llvmpipe/lp_clear.c index 4bae44e2ea..3e8c410925 100644 --- a/src/gallium/drivers/llvmpipe/lp_clear.c +++ b/src/gallium/drivers/llvmpipe/lp_clear.c @@ -36,7 +36,6 @@ #include "lp_clear.h" #include "lp_context.h" #include "lp_setup.h" -#include "lp_state.h" /** diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index b796148457..194692045d 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -88,7 +88,6 @@ struct llvmpipe_context { /** Vertex format */ struct vertex_info vertex_info; - struct vertex_info vertex_info_vbuf; /** Which vertex shader output slot contains point size */ int psize_slot; diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index e6519cb216..9405150c4f 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -35,7 +35,6 @@ #include "lp_flush.h" #include "lp_context.h" #include "lp_surface.h" -#include "lp_state.h" #include "lp_winsys.h" #include "lp_setup.h" diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 24393c8e89..6772ff332b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -34,7 +34,6 @@ #include "lp_scene_queue.h" #include "lp_debug.h" #include "lp_fence.h" -#include "lp_state.h" #include "lp_rast.h" #include "lp_rast_priv.h" #include "lp_tile_soa.h" diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 64fe3600f5..6017dc553a 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -215,11 +215,4 @@ void llvmpipe_unmap_texture_surfaces(struct llvmpipe_context *lp); -struct vertex_info * -llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe); - -struct vertex_info * -llvmpipe_get_vbuf_vertex_info(struct llvmpipe_context *llvmpipe); - - #endif diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index ab827045ed..cc7b09fd4d 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -43,29 +43,29 @@ * (simple float[][4]) used by the 'draw' module into vertices for * rasterization. * - * This function validates the vertex layout and returns a pointer to a - * vertex_info object. + * This function validates the vertex layout. */ static void compute_vertex_info(struct llvmpipe_context *llvmpipe) { const struct lp_fragment_shader *lpfs = llvmpipe->fs; - struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf; + struct vertex_info *vinfo = &llvmpipe->vertex_info; const uint num = draw_num_vs_outputs(llvmpipe->draw); uint i; - /* Tell draw_vbuf to simply emit the whole post-xform vertex as-is. + /* Tell setup to tell the draw module to simply emit the whole + * post-xform vertex as-is. * * Not really sure if this is the best approach. */ - vinfo_vbuf->num_attribs = 0; + vinfo->num_attribs = 0; for (i = 0; i < num; i++) { - draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, i); } - draw_compute_vertex_size(vinfo_vbuf); + draw_compute_vertex_size(vinfo); - lp_setup_set_vertex_info(llvmpipe->setup, vinfo_vbuf); + lp_setup_set_vertex_info(llvmpipe->setup, vinfo); /* llvmpipe->psize_slot = draw_find_vs_output(llvmpipe->draw, -- cgit v1.2.3 From 601969c58729e26db6a33645a6a9ddb0b6ea2b92 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 6 Jan 2010 17:13:28 +0000 Subject: llvmpipe: rename some functions --- src/gallium/drivers/llvmpipe/lp_setup_vbuf.c | 36 ++++++++++++++-------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c index 5cd4f354fd..42c30af5ba 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c @@ -59,7 +59,7 @@ setup_context(struct vbuf_render *vbr) static const struct vertex_info * -lp_vbuf_get_vertex_info(struct vbuf_render *vbr) +lp_setup_get_vertex_info(struct vbuf_render *vbr) { struct setup_context *setup = setup_context(vbr); return setup->vertex_info; @@ -67,7 +67,7 @@ lp_vbuf_get_vertex_info(struct vbuf_render *vbr) static boolean -lp_vbuf_allocate_vertices(struct vbuf_render *vbr, +lp_setup_allocate_vertices(struct vbuf_render *vbr, ushort vertex_size, ushort nr_vertices) { struct setup_context *setup = setup_context(vbr); @@ -86,20 +86,20 @@ lp_vbuf_allocate_vertices(struct vbuf_render *vbr, } static void -lp_vbuf_release_vertices(struct vbuf_render *vbr) +lp_setup_release_vertices(struct vbuf_render *vbr) { /* keep the old allocation for next time */ } static void * -lp_vbuf_map_vertices(struct vbuf_render *vbr) +lp_setup_map_vertices(struct vbuf_render *vbr) { struct setup_context *setup = setup_context(vbr); return setup->vertex_buffer; } static void -lp_vbuf_unmap_vertices(struct vbuf_render *vbr, +lp_setup_unmap_vertices(struct vbuf_render *vbr, ushort min_index, ushort max_index ) { @@ -110,7 +110,7 @@ lp_vbuf_unmap_vertices(struct vbuf_render *vbr, static boolean -lp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) +lp_setup_set_primitive(struct vbuf_render *vbr, unsigned prim) { setup_context(vbr)->prim = prim; return TRUE; @@ -129,7 +129,7 @@ static INLINE const_float4_ptr get_vert( const void *vertex_buffer, * draw elements / indexed primitives */ static void -lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) +lp_setup_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) { struct setup_context *setup = setup_context(vbr); const unsigned stride = setup->vertex_info->size * sizeof(float); @@ -312,7 +312,7 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) * It's up to us to convert the vertex array into point/line/tri prims. */ static void -lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) +lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) { struct setup_context *setup = setup_context(vbr); const unsigned stride = setup->vertex_info->size * sizeof(float); @@ -493,7 +493,7 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) static void -lp_vbuf_destroy(struct vbuf_render *vbr) +lp_setup_vbuf_destroy(struct vbuf_render *vbr) { lp_setup_destroy(setup_context(vbr)); } @@ -508,13 +508,13 @@ lp_setup_init_vbuf(struct setup_context *setup) setup->base.max_indices = LP_MAX_VBUF_INDEXES; setup->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE; - setup->base.get_vertex_info = lp_vbuf_get_vertex_info; - setup->base.allocate_vertices = lp_vbuf_allocate_vertices; - setup->base.map_vertices = lp_vbuf_map_vertices; - setup->base.unmap_vertices = lp_vbuf_unmap_vertices; - setup->base.set_primitive = lp_vbuf_set_primitive; - setup->base.draw = lp_vbuf_draw; - setup->base.draw_arrays = lp_vbuf_draw_arrays; - setup->base.release_vertices = lp_vbuf_release_vertices; - setup->base.destroy = lp_vbuf_destroy; + setup->base.get_vertex_info = lp_setup_get_vertex_info; + setup->base.allocate_vertices = lp_setup_allocate_vertices; + setup->base.map_vertices = lp_setup_map_vertices; + setup->base.unmap_vertices = lp_setup_unmap_vertices; + setup->base.set_primitive = lp_setup_set_primitive; + setup->base.draw = lp_setup_draw; + setup->base.draw_arrays = lp_setup_draw_arrays; + setup->base.release_vertices = lp_setup_release_vertices; + setup->base.destroy = lp_setup_vbuf_destroy; } -- cgit v1.2.3 From b08583da468ee186b43ea678f8d33fb7df3ab372 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 6 Jan 2010 17:13:37 +0000 Subject: llvmpipe: fix double free --- src/gallium/drivers/llvmpipe/lp_context.c | 5 ++--- src/gallium/drivers/llvmpipe/lp_setup.c | 3 ++- src/gallium/drivers/llvmpipe/lp_setup.h | 2 -- src/gallium/drivers/llvmpipe/lp_setup_context.h | 2 ++ 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 0457ccc8a9..696a9d5f6a 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -53,12 +53,11 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); uint i; + /* This will also destroy llvmpipe->setup: + */ if (llvmpipe->draw) draw_destroy( llvmpipe->draw ); - if (llvmpipe->setup) - lp_setup_destroy( llvmpipe->setup ); - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { pipe_surface_reference(&llvmpipe->framebuffer.cbufs[i], NULL); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index e2b21aed47..1eb944a0de 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -571,7 +571,8 @@ lp_setup_update_state( struct setup_context *setup ) - +/* Only caller is lp_setup_vbuf_destroy() + */ void lp_setup_destroy( struct setup_context *setup ) { diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index a6120fcbe4..bf12cb8527 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -128,7 +128,5 @@ void lp_setup_set_vertex_info( struct setup_context *setup, struct vertex_info *info ); -void -lp_setup_destroy( struct setup_context *setup ); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index d2278a46e6..a1808fcd4c 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -141,4 +141,6 @@ void lp_setup_init_vbuf(struct setup_context *setup); void lp_setup_update_state( struct setup_context *setup ); +void lp_setup_destroy( struct setup_context *setup ); + #endif -- cgit v1.2.3 From 4e8d67af574af480fdcca79e23836464c86b2dee Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 4 Jan 2010 15:22:58 -0700 Subject: llvmpipe: flow-control comments --- src/gallium/drivers/llvmpipe/lp_bld_flow.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index fe9c6941f7..22b4310f06 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -145,6 +145,10 @@ lp_build_flow_destroy(struct lp_build_flow_context *flow) } +/** + * Begin/push a new flow control construct, such as a loop, skip block + * or variable scope. + */ static union lp_build_flow_construct_data * lp_build_flow_push(struct lp_build_flow_context *flow, enum lp_build_flow_construct_kind kind) @@ -158,6 +162,10 @@ lp_build_flow_push(struct lp_build_flow_context *flow, } +/** + * Return the current/top flow control construct on the stack. + * \param kind the expected type of the top-most construct + */ static union lp_build_flow_construct_data * lp_build_flow_peek(struct lp_build_flow_context *flow, enum lp_build_flow_construct_kind kind) @@ -174,6 +182,10 @@ lp_build_flow_peek(struct lp_build_flow_context *flow, } +/** + * End/pop the current/top flow control construct on the stack. + * \param kind the expected type of the top-most construct + */ static union lp_build_flow_construct_data * lp_build_flow_pop(struct lp_build_flow_context *flow, enum lp_build_flow_construct_kind kind) @@ -213,11 +225,11 @@ lp_build_flow_scope_begin(struct lp_build_flow_context *flow) * * A variable is a named entity which can have different LLVMValueRef's at * different points of the program. This is relevant for control flow because - * when there are mutiple branches to a same location we need to replace + * when there are multiple branches to a same location we need to replace * the variable's value with a Phi function as explained in * http://en.wikipedia.org/wiki/Static_single_assignment_form . * - * We keep track of variables by keeping around a pointer to where their + * We keep track of variables by keeping around a pointer to where they're * current. * * There are a few cautions to observe: @@ -386,6 +398,9 @@ lp_build_flow_skip_end(struct lp_build_flow_context *flow) } +/** + * Check if the mask predicate is zero. If so, jump to the end of the block. + */ static void lp_build_mask_check(struct lp_build_mask_context *mask) { -- cgit v1.2.3 From db7f9b053b7982810a00bc4d944bb3dfa2b9aac9 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 6 Jan 2010 14:11:54 -0700 Subject: llvmpipe: more comments in flow builder code --- src/gallium/drivers/llvmpipe/lp_bld_flow.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index 22b4310f06..e42b653b67 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -310,6 +310,11 @@ lp_build_flow_insert_block(struct lp_build_flow_context *flow) return new_block; } + +/** + * Begin a "skip" block. Inside this block we can test a condition and + * skip to the end of the block if the condition is false. + */ void lp_build_flow_skip_begin(struct lp_build_flow_context *flow) { @@ -321,13 +326,16 @@ lp_build_flow_skip_begin(struct lp_build_flow_context *flow) if(!skip) return; + /* create new basic block */ skip->block = lp_build_flow_insert_block(flow); + skip->num_variables = flow->num_variables; if(!skip->num_variables) { skip->phi = NULL; return; } + /* Allocate a Phi node for each variable in this skip scope */ skip->phi = MALLOC(skip->num_variables * sizeof *skip->phi); if(!skip->phi) { skip->num_variables = 0; @@ -337,6 +345,7 @@ lp_build_flow_skip_begin(struct lp_build_flow_context *flow) builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, skip->block); + /* create a Phi node for each variable */ for(i = 0; i < skip->num_variables; ++i) skip->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); @@ -344,6 +353,10 @@ lp_build_flow_skip_begin(struct lp_build_flow_context *flow) } +/** + * Insert code to test a condition and branch to the end of the current + * skip block if the condition is true. + */ void lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, LLVMValueRef cond) @@ -361,15 +374,17 @@ lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, new_block = lp_build_flow_insert_block(flow); + /* for each variable, update the Phi node with a (variable, block) pair */ for(i = 0; i < skip->num_variables; ++i) { assert(*flow->variables[i]); LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); } + /* if cond is true, goto skip->block, else goto new_block */ LLVMBuildCondBr(flow->builder, cond, skip->block, new_block); LLVMPositionBuilderAtEnd(flow->builder, new_block); - } +} void @@ -385,12 +400,14 @@ lp_build_flow_skip_end(struct lp_build_flow_context *flow) current_block = LLVMGetInsertBlock(flow->builder); + /* add (variable, block) tuples to the phi nodes */ for(i = 0; i < skip->num_variables; ++i) { assert(*flow->variables[i]); LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); *flow->variables[i] = skip->phi[i]; } + /* goto block */ LLVMBuildBr(flow->builder, skip->block); LLVMPositionBuilderAtEnd(flow->builder, skip->block); @@ -407,12 +424,14 @@ lp_build_mask_check(struct lp_build_mask_context *mask) LLVMBuilderRef builder = mask->flow->builder; LLVMValueRef cond; + /* cond = (mask == 0) */ cond = LLVMBuildICmp(builder, LLVMIntEQ, LLVMBuildBitCast(builder, mask->value, mask->reg_type, ""), LLVMConstNull(mask->reg_type), ""); + /* if cond, goto end of block */ lp_build_flow_skip_cond_break(mask->flow, cond); } -- cgit v1.2.3 From baeb3a23513b9045c1a50bbe21124a4f8a9b6cd6 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 6 Jan 2010 17:53:12 -0700 Subject: llvmpipe: checkpoint commit of new if/else/endif flow control Totally untested at this point. More work to do. --- src/gallium/drivers/llvmpipe/lp_bld_flow.c | 243 ++++++++++++++++++++++++++++- src/gallium/drivers/llvmpipe/lp_bld_flow.h | 27 ++++ 2 files changed, 269 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index e42b653b67..230edc6a5c 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -41,6 +41,8 @@ #define LP_BUILD_FLOW_MAX_VARIABLES 32 #define LP_BUILD_FLOW_MAX_DEPTH 32 +#define LP_BUILD_IF_MAX_VARIABLES 8 + /** * Enumeration of all possible flow constructs. @@ -48,6 +50,7 @@ enum lp_build_flow_construct_kind { LP_BUILD_FLOW_SCOPE, LP_BUILD_FLOW_SKIP, + LP_BUILD_FLOW_IF }; @@ -73,7 +76,24 @@ struct lp_build_flow_skip /** Number of variables declared at the beginning */ unsigned num_variables; - LLVMValueRef *phi; + LLVMValueRef *phi; /**< array [num_variables] */ +}; + + +/** + * if/else/endif. + */ +struct lp_build_flow_if +{ + unsigned num_variables; + + /** phi variables in the true clause */ + LLVMValueRef true_variables[LP_BUILD_IF_MAX_VARIABLES]; + unsigned num_true_variables; + + /** phi variables in the false clause */ + LLVMValueRef false_variables[LP_BUILD_IF_MAX_VARIABLES]; + unsigned num_false_variables; }; @@ -84,6 +104,7 @@ union lp_build_flow_construct_data { struct lp_build_flow_scope scope; struct lp_build_flow_skip skip; + struct lp_build_flow_if ifthen; }; @@ -540,3 +561,223 @@ lp_build_loop_end(LLVMBuilderRef builder, LLVMPositionBuilderAtEnd(builder, after_block); } + + +/* + Example of if/then/else building: + + int x; + if (cond) { + x = 1 + 2; + } + else { + x = 2 + 3; + } + + Is built with: + + flow = lp_build_flow_create(builder); + ... + + lp_build_flow_scope_declare(flow, "x"); + + lp_build_if(ctx, flow, builder, cond); + x = LLVMAdd(1, 2); + lp_build_if_phi_var(ctx, "x"); + lp_build_else(ctx); + x = LLVMAdd(2, 3); + lp_build_if_phi_var(ctx, "x"); + lp_build_endif(ctx); + + ... + + flow = lp_build_flow_end(flow); + */ + + + +/** + * Begin an if/else/endif construct. + */ +void +lp_build_if(struct lp_build_if_state *ctx, + struct lp_build_flow_context *flow, + LLVMBuilderRef builder, + LLVMValueRef condition) +{ + LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); + LLVMValueRef function = LLVMGetBasicBlockParent(block); + struct lp_build_flow_if *ifthen; + + memset(ctx, 0, sizeof(*ctx)); + ctx->builder = builder; + ctx->flow = flow; + ctx->condition = condition; + ctx->entry_block = block; + + /* push/create new scope */ + ifthen = &lp_build_flow_push(flow, LP_BUILD_FLOW_IF)->ifthen; + assert(ifthen); + + ifthen->num_variables = flow->num_variables; + ifthen->num_true_variables = 0; + ifthen->num_false_variables = 0; + + /* allocate the block for the if/true clause */ + ctx->true_block = LLVMAppendBasicBlock(function, "true block"); + /* XXX is this correct ??? */ + LLVMPositionBuilderAtEnd(builder, ctx->true_block); +} + + +/** + * Begin else-part of a conditional + */ +void +lp_build_else(struct lp_build_if_state *ctx) +{ + LLVMBasicBlockRef block = LLVMGetInsertBlock(ctx->builder); + LLVMValueRef function = LLVMGetBasicBlockParent(block); + struct lp_build_flow_if *ifthen; + + ifthen = &lp_build_flow_peek(ctx->flow, LP_BUILD_FLOW_IF)->ifthen; + assert(ifthen); + + /* allocate the block for the else/false clause */ + ctx->false_block = LLVMAppendBasicBlock(function, "false block"); + /* XXX is this correct ??? */ + LLVMPositionBuilderAtEnd(ctx->builder, ctx->false_block); +} + + +/** + * End a conditional. + * This involves building a "merge" block at the endif which + * contains the phi instructions. + */ +void +lp_build_endif(struct lp_build_if_state *ctx) +{ + LLVMBasicBlockRef block = LLVMGetInsertBlock(ctx->builder); + LLVMValueRef function = LLVMGetBasicBlockParent(block); + LLVMBasicBlockRef merge_block = LLVMAppendBasicBlock(function, "endif block"); + LLVMValueRef phi[LP_BUILD_FLOW_MAX_VARIABLES]; + struct lp_build_flow_if *ifthen; + unsigned i; + + /* build the endif/merge block now */ + /* XXX this is probably wrong */ + LLVMPositionBuilderAtEnd(ctx->builder, merge_block); + + ifthen = &lp_build_flow_pop(ctx->flow, LP_BUILD_FLOW_IF)->ifthen; + assert(ifthen); + + memset(phi, 0, sizeof(phi)); + + /* build phi nodes for any variables which were declared inside if part */ + + for (i = 0; i < ifthen->num_variables; i++) { + LLVMValueRef *var = ctx->flow->variables[i]; + const char *name = LLVMGetValueName(*var); + unsigned j; + + /* search true-clause variables list for 'name' */ + for (j = 0; j < ifthen->num_true_variables; j++) { + LLVMValueRef v = ifthen->true_variables[j]; + if (strcmp(LLVMGetValueName(v), name) == 0) { + /* add phi */ + if (!phi[i]) + phi[i] = LLVMBuildPhi(ctx->builder, LLVMTypeOf(*var), ""); + LLVMAddIncoming(phi[i], &v, &ctx->true_block, 1); + } + } + + /* search false-clause variables list for 'name' */ + for (j = 0; j < ifthen->num_false_variables; j++) { + LLVMValueRef v = ifthen->false_variables[j]; + if (strcmp(LLVMGetValueName(v), name) == 0) { + /* add phi */ + if (!phi[i]) + phi[i] = LLVMBuildPhi(ctx->builder, LLVMTypeOf(*var), ""); + LLVMAddIncoming(phi[i], &v, &ctx->false_block, 1); + } + } + + /* "return" new phi variable to calling code */ + if (phi[i]) + *var = phi[i]; + } + + /*** + *** Insert the various branch instructions here. + *** XXX need to verify all the builder/block positioning is correct. + ***/ + + /* Insert the conditional branch instruction at the end of entry_block */ + LLVMPositionBuilderAtEnd(ctx->builder, ctx->entry_block); + + if (ctx->false_block) { + /* we have an else clause */ + LLVMBuildCondBr(ctx->builder, ctx->condition, + ctx->true_block, ctx->false_block); + } + else { + /* no else clause */ + LLVMBuildCondBr(ctx->builder, ctx->condition, + ctx->true_block, merge_block); + } + + /* Append an unconditional Br(anch) instruction on the true_block */ + LLVMPositionBuilderAtEnd(ctx->builder, ctx->true_block); + LLVMBuildBr(ctx->builder, merge_block); + if (ctx->false_block) { + /* Append an unconditional Br(anch) instruction on the false_block */ + LLVMPositionBuilderAtEnd(ctx->builder, ctx->false_block); + LLVMBuildBr(ctx->builder, merge_block); + } + + + /* Finish-up: continue building at end of the merge_block */ + /* XXX is this right? */ + LLVMPositionBuilderAtEnd(ctx->builder, merge_block); +} + + +/** + * Declare a variable that needs to be merged with another variable + * via a phi function. + * This function must be called after lp_build_if() and lp_build_endif(). + */ +void +lp_build_if_phi_var(struct lp_build_if_state *ctx, LLVMValueRef var) +{ + struct lp_build_flow_if *ifthen; + const char *name; + + name = LLVMGetValueName(var); + assert(name && "variable requires a name"); + + /* make sure the var existed before the if/then/else */ + { + boolean found = FALSE; + uint i; + for (i = 0; i < ctx->flow->num_variables; i++) { + LLVMValueRef *var = ctx->flow->variables[i]; + if (strcmp(LLVMGetValueName(*var), name) == 0) { + found = TRUE; + break; + } + } + assert(found); + } + + ifthen = &lp_build_flow_pop(ctx->flow, LP_BUILD_FLOW_IF)->ifthen; + + if (ctx->false_block) { + ifthen->false_variables[ifthen->num_false_variables++] = var; + } + else { + assert(ctx->true_block); + ifthen->true_variables[ifthen->num_true_variables++] = var; + } +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.h b/src/gallium/drivers/llvmpipe/lp_bld_flow.h index e61999ff06..1f294b8a49 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.h @@ -126,4 +126,31 @@ lp_build_loop_end(LLVMBuilderRef builder, + +struct lp_build_if_state +{ + LLVMBuilderRef builder; + struct lp_build_flow_context *flow; + LLVMValueRef condition; + LLVMBasicBlockRef entry_block, true_block, false_block; +}; + + +void +lp_build_if(struct lp_build_if_state *ctx, + struct lp_build_flow_context *flow, + LLVMBuilderRef builder, + LLVMValueRef condition); + +void +lp_build_if_phi_var(struct lp_build_if_state *ctx, LLVMValueRef var); + +void +lp_build_else(struct lp_build_if_state *ctx); + +void +lp_build_endif(struct lp_build_if_state *ctx); + + + #endif /* !LP_BLD_FLOW_H */ -- cgit v1.2.3 From 9cdf6f025b2ed55cfb13dd09f870f01d0c7947d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Fri, 18 Dec 2009 22:40:33 +0000 Subject: scons: Set the default windows platform to be windows userspace. I thought I had done this ages ago. --- common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common.py b/common.py index 3b6bf52c03..101fc558f4 100644 --- a/common.py +++ b/common.py @@ -12,7 +12,7 @@ import platform as _platform _platform_map = { 'linux2': 'linux', - 'win32': 'winddk', + 'win32': 'windows', } default_platform = sys.platform -- cgit v1.2.3 From 70b8d59792a814a5a81b86d57016314754d91593 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 8 Jan 2010 11:01:00 -0700 Subject: llvmpipe: checkpoint if/else/endif contructs work The LLVM IR looks correct now. Basic blocks are where they're supposed to be and the Phi functions have the right (var,block) information. --- src/gallium/drivers/llvmpipe/lp_bld_flow.c | 220 +++++++++++++---------------- src/gallium/drivers/llvmpipe/lp_bld_flow.h | 5 +- 2 files changed, 96 insertions(+), 129 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index 230edc6a5c..a347cedf03 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -87,13 +87,7 @@ struct lp_build_flow_if { unsigned num_variables; - /** phi variables in the true clause */ - LLVMValueRef true_variables[LP_BUILD_IF_MAX_VARIABLES]; - unsigned num_true_variables; - - /** phi variables in the false clause */ - LLVMValueRef false_variables[LP_BUILD_IF_MAX_VARIABLES]; - unsigned num_false_variables; + LLVMValueRef *phi; /**< array [num_variables] */ }; @@ -310,28 +304,43 @@ lp_build_flow_scope_end(struct lp_build_flow_context *flow) } +/** + * Note: this function has no dependencies on the flow code and could + * be used elsewhere. + */ static LLVMBasicBlockRef -lp_build_flow_insert_block(struct lp_build_flow_context *flow) +lp_build_insert_new_block(LLVMBuilderRef builder, const char *name) { LLVMBasicBlockRef current_block; LLVMBasicBlockRef next_block; LLVMBasicBlockRef new_block; - current_block = LLVMGetInsertBlock(flow->builder); + /* get current basic block */ + current_block = LLVMGetInsertBlock(builder); + /* check if there's another block after this one */ next_block = LLVMGetNextBasicBlock(current_block); - if(next_block) { - new_block = LLVMInsertBasicBlock(next_block, ""); + if (next_block) { + /* insert the new block before the next block */ + new_block = LLVMInsertBasicBlock(next_block, name); } else { + /* append new block after current block */ LLVMValueRef function = LLVMGetBasicBlockParent(current_block); - new_block = LLVMAppendBasicBlock(function, ""); + new_block = LLVMAppendBasicBlock(function, name); } return new_block; } +static LLVMBasicBlockRef +lp_build_flow_insert_block(struct lp_build_flow_context *flow) +{ + return lp_build_insert_new_block(flow->builder, ""); +} + + /** * Begin a "skip" block. Inside this block we can test a condition and * skip to the end of the block if the condition is false. @@ -576,22 +585,24 @@ lp_build_loop_end(LLVMBuilderRef builder, Is built with: + LLVMValueRef x = LLVMGetUndef(); // or something else + flow = lp_build_flow_create(builder); - ... - lp_build_flow_scope_declare(flow, "x"); + lp_build_flow_scope_begin(flow); + + // x needs a phi node + lp_build_flow_scope_declare(flow, &x); - lp_build_if(ctx, flow, builder, cond); - x = LLVMAdd(1, 2); - lp_build_if_phi_var(ctx, "x"); - lp_build_else(ctx); - x = LLVMAdd(2, 3); - lp_build_if_phi_var(ctx, "x"); - lp_build_endif(ctx); + lp_build_if(ctx, flow, builder, cond); + x = LLVMAdd(1, 2); + lp_build_else(ctx); + x = LLVMAdd(2, 3); + lp_build_endif(ctx); - ... + lp_build_flow_scope_end(flow); - flow = lp_build_flow_end(flow); + lp_build_flow_destroy(flow); */ @@ -606,8 +617,8 @@ lp_build_if(struct lp_build_if_state *ctx, LLVMValueRef condition) { LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); - LLVMValueRef function = LLVMGetBasicBlockParent(block); struct lp_build_flow_if *ifthen; + unsigned i; memset(ctx, 0, sizeof(*ctx)); ctx->builder = builder; @@ -620,12 +631,27 @@ lp_build_if(struct lp_build_if_state *ctx, assert(ifthen); ifthen->num_variables = flow->num_variables; - ifthen->num_true_variables = 0; - ifthen->num_false_variables = 0; - /* allocate the block for the if/true clause */ - ctx->true_block = LLVMAppendBasicBlock(function, "true block"); - /* XXX is this correct ??? */ + /* create a Phi node for each variable in this flow scope */ + ifthen->phi = MALLOC(ifthen->num_variables * sizeof(*ifthen->phi)); + if (!ifthen->phi) { + ifthen->num_variables = 0; + return; + } + + /* create endif/merge basic block for the phi functions */ + ctx->merge_block = lp_build_insert_new_block(builder, "endif-block"); + LLVMPositionBuilderAtEnd(builder, ctx->merge_block); + + /* create a phi node for each variable */ + for (i = 0; i < flow->num_variables; i++) + ifthen->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); + + + /* create/insert true_block before merge_block */ + ctx->true_block = LLVMInsertBasicBlock(ctx->merge_block, "if-true-block"); + + /* successive code goes into the true block */ LLVMPositionBuilderAtEnd(builder, ctx->true_block); } @@ -636,86 +662,71 @@ lp_build_if(struct lp_build_if_state *ctx, void lp_build_else(struct lp_build_if_state *ctx) { - LLVMBasicBlockRef block = LLVMGetInsertBlock(ctx->builder); - LLVMValueRef function = LLVMGetBasicBlockParent(block); + struct lp_build_flow_context *flow = ctx->flow; struct lp_build_flow_if *ifthen; + unsigned i; - ifthen = &lp_build_flow_peek(ctx->flow, LP_BUILD_FLOW_IF)->ifthen; + ifthen = &lp_build_flow_peek(flow, LP_BUILD_FLOW_IF)->ifthen; assert(ifthen); - /* allocate the block for the else/false clause */ - ctx->false_block = LLVMAppendBasicBlock(function, "false block"); - /* XXX is this correct ??? */ + /* for each variable, update the Phi node with a (variable, block) pair */ + LLVMPositionBuilderAtEnd(ctx->builder, ctx->merge_block); + for (i = 0; i < flow->num_variables; i++) { + assert(*flow->variables[i]); + LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ctx->true_block, 1); + } + + /* create/insert false_block before the merge block */ + ctx->false_block = LLVMInsertBasicBlock(ctx->merge_block, "if-false-block"); + + /* successive code goes into the else block */ LLVMPositionBuilderAtEnd(ctx->builder, ctx->false_block); } /** * End a conditional. - * This involves building a "merge" block at the endif which - * contains the phi instructions. */ void lp_build_endif(struct lp_build_if_state *ctx) { - LLVMBasicBlockRef block = LLVMGetInsertBlock(ctx->builder); - LLVMValueRef function = LLVMGetBasicBlockParent(block); - LLVMBasicBlockRef merge_block = LLVMAppendBasicBlock(function, "endif block"); - LLVMValueRef phi[LP_BUILD_FLOW_MAX_VARIABLES]; + struct lp_build_flow_context *flow = ctx->flow; struct lp_build_flow_if *ifthen; unsigned i; - /* build the endif/merge block now */ - /* XXX this is probably wrong */ - LLVMPositionBuilderAtEnd(ctx->builder, merge_block); - - ifthen = &lp_build_flow_pop(ctx->flow, LP_BUILD_FLOW_IF)->ifthen; + ifthen = &lp_build_flow_pop(flow, LP_BUILD_FLOW_IF)->ifthen; assert(ifthen); - memset(phi, 0, sizeof(phi)); - - /* build phi nodes for any variables which were declared inside if part */ + if (ctx->false_block) { + LLVMPositionBuilderAtEnd(ctx->builder, ctx->merge_block); + /* for each variable, update the Phi node with a (variable, block) pair */ + for (i = 0; i < flow->num_variables; i++) { + assert(*flow->variables[i]); + LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ctx->false_block, 1); + } + } + else { + /* no else clause */ + LLVMPositionBuilderAtEnd(ctx->builder, ctx->merge_block); + for (i = 0; i < flow->num_variables; i++) { + LLVMValueRef undef; - for (i = 0; i < ifthen->num_variables; i++) { - LLVMValueRef *var = ctx->flow->variables[i]; - const char *name = LLVMGetValueName(*var); - unsigned j; + assert(*flow->variables[i]); - /* search true-clause variables list for 'name' */ - for (j = 0; j < ifthen->num_true_variables; j++) { - LLVMValueRef v = ifthen->true_variables[j]; - if (strcmp(LLVMGetValueName(v), name) == 0) { - /* add phi */ - if (!phi[i]) - phi[i] = LLVMBuildPhi(ctx->builder, LLVMTypeOf(*var), ""); - LLVMAddIncoming(phi[i], &v, &ctx->true_block, 1); - } - } + LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ctx->true_block, 1); - /* search false-clause variables list for 'name' */ - for (j = 0; j < ifthen->num_false_variables; j++) { - LLVMValueRef v = ifthen->false_variables[j]; - if (strcmp(LLVMGetValueName(v), name) == 0) { - /* add phi */ - if (!phi[i]) - phi[i] = LLVMBuildPhi(ctx->builder, LLVMTypeOf(*var), ""); - LLVMAddIncoming(phi[i], &v, &ctx->false_block, 1); - } + /* undef value from the block preceeding the 'if' */ + undef = LLVMGetUndef(LLVMTypeOf(*flow->variables[i])); + LLVMAddIncoming(ifthen->phi[i], &undef, &ctx->entry_block, 1); } - - /* "return" new phi variable to calling code */ - if (phi[i]) - *var = phi[i]; } /*** - *** Insert the various branch instructions here. - *** XXX need to verify all the builder/block positioning is correct. + *** Now patch in the various branch instructions. ***/ /* Insert the conditional branch instruction at the end of entry_block */ LLVMPositionBuilderAtEnd(ctx->builder, ctx->entry_block); - if (ctx->false_block) { /* we have an else clause */ LLVMBuildCondBr(ctx->builder, ctx->condition, @@ -724,60 +735,19 @@ lp_build_endif(struct lp_build_if_state *ctx) else { /* no else clause */ LLVMBuildCondBr(ctx->builder, ctx->condition, - ctx->true_block, merge_block); + ctx->true_block, ctx->merge_block); } /* Append an unconditional Br(anch) instruction on the true_block */ LLVMPositionBuilderAtEnd(ctx->builder, ctx->true_block); - LLVMBuildBr(ctx->builder, merge_block); + LLVMBuildBr(ctx->builder, ctx->merge_block); if (ctx->false_block) { /* Append an unconditional Br(anch) instruction on the false_block */ LLVMPositionBuilderAtEnd(ctx->builder, ctx->false_block); - LLVMBuildBr(ctx->builder, merge_block); - } - - - /* Finish-up: continue building at end of the merge_block */ - /* XXX is this right? */ - LLVMPositionBuilderAtEnd(ctx->builder, merge_block); -} - - -/** - * Declare a variable that needs to be merged with another variable - * via a phi function. - * This function must be called after lp_build_if() and lp_build_endif(). - */ -void -lp_build_if_phi_var(struct lp_build_if_state *ctx, LLVMValueRef var) -{ - struct lp_build_flow_if *ifthen; - const char *name; - - name = LLVMGetValueName(var); - assert(name && "variable requires a name"); - - /* make sure the var existed before the if/then/else */ - { - boolean found = FALSE; - uint i; - for (i = 0; i < ctx->flow->num_variables; i++) { - LLVMValueRef *var = ctx->flow->variables[i]; - if (strcmp(LLVMGetValueName(*var), name) == 0) { - found = TRUE; - break; - } - } - assert(found); + LLVMBuildBr(ctx->builder, ctx->merge_block); } - ifthen = &lp_build_flow_pop(ctx->flow, LP_BUILD_FLOW_IF)->ifthen; - if (ctx->false_block) { - ifthen->false_variables[ifthen->num_false_variables++] = var; - } - else { - assert(ctx->true_block); - ifthen->true_variables[ifthen->num_true_variables++] = var; - } + /* Resume building code at end of the ctx->merge_block */ + LLVMPositionBuilderAtEnd(ctx->builder, ctx->merge_block); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.h b/src/gallium/drivers/llvmpipe/lp_bld_flow.h index 1f294b8a49..7c7cc402a3 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.h @@ -132,7 +132,7 @@ struct lp_build_if_state LLVMBuilderRef builder; struct lp_build_flow_context *flow; LLVMValueRef condition; - LLVMBasicBlockRef entry_block, true_block, false_block; + LLVMBasicBlockRef entry_block, true_block, false_block, merge_block; }; @@ -142,9 +142,6 @@ lp_build_if(struct lp_build_if_state *ctx, LLVMBuilderRef builder, LLVMValueRef condition); -void -lp_build_if_phi_var(struct lp_build_if_state *ctx, LLVMValueRef var); - void lp_build_else(struct lp_build_if_state *ctx); -- cgit v1.2.3 From af31e65b5542147a53e4d3198eb8437f89457451 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 8 Jan 2010 11:20:38 -0700 Subject: llvmpipe: free the phi array --- src/gallium/drivers/llvmpipe/lp_bld_flow.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index a347cedf03..b7fa817e22 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -721,6 +721,8 @@ lp_build_endif(struct lp_build_if_state *ctx) } } + FREE(ifthen->phi); + /*** *** Now patch in the various branch instructions. ***/ -- cgit v1.2.3 From 855d7f51e4cfd6f4ce04bf34164676ba3bc2fc39 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 8 Jan 2010 11:32:36 -0700 Subject: llvmpipe: move some fields to the private lp_build_flow_if struct --- src/gallium/drivers/llvmpipe/lp_bld_flow.c | 61 ++++++++++++++++-------------- src/gallium/drivers/llvmpipe/lp_bld_flow.h | 2 - 2 files changed, 32 insertions(+), 31 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index b7fa817e22..161ec95d8c 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -88,6 +88,9 @@ struct lp_build_flow_if unsigned num_variables; LLVMValueRef *phi; /**< array [num_variables] */ + + LLVMValueRef condition; + LLVMBasicBlockRef entry_block, true_block, false_block, merge_block; }; @@ -623,14 +626,14 @@ lp_build_if(struct lp_build_if_state *ctx, memset(ctx, 0, sizeof(*ctx)); ctx->builder = builder; ctx->flow = flow; - ctx->condition = condition; - ctx->entry_block = block; /* push/create new scope */ ifthen = &lp_build_flow_push(flow, LP_BUILD_FLOW_IF)->ifthen; assert(ifthen); ifthen->num_variables = flow->num_variables; + ifthen->condition = condition; + ifthen->entry_block = block; /* create a Phi node for each variable in this flow scope */ ifthen->phi = MALLOC(ifthen->num_variables * sizeof(*ifthen->phi)); @@ -640,8 +643,8 @@ lp_build_if(struct lp_build_if_state *ctx, } /* create endif/merge basic block for the phi functions */ - ctx->merge_block = lp_build_insert_new_block(builder, "endif-block"); - LLVMPositionBuilderAtEnd(builder, ctx->merge_block); + ifthen->merge_block = lp_build_insert_new_block(builder, "endif-block"); + LLVMPositionBuilderAtEnd(builder, ifthen->merge_block); /* create a phi node for each variable */ for (i = 0; i < flow->num_variables; i++) @@ -649,10 +652,10 @@ lp_build_if(struct lp_build_if_state *ctx, /* create/insert true_block before merge_block */ - ctx->true_block = LLVMInsertBasicBlock(ctx->merge_block, "if-true-block"); + ifthen->true_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-true-block"); /* successive code goes into the true block */ - LLVMPositionBuilderAtEnd(builder, ctx->true_block); + LLVMPositionBuilderAtEnd(builder, ifthen->true_block); } @@ -670,17 +673,17 @@ lp_build_else(struct lp_build_if_state *ctx) assert(ifthen); /* for each variable, update the Phi node with a (variable, block) pair */ - LLVMPositionBuilderAtEnd(ctx->builder, ctx->merge_block); + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); for (i = 0; i < flow->num_variables; i++) { assert(*flow->variables[i]); - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ctx->true_block, 1); + LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->true_block, 1); } /* create/insert false_block before the merge block */ - ctx->false_block = LLVMInsertBasicBlock(ctx->merge_block, "if-false-block"); + ifthen->false_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-false-block"); /* successive code goes into the else block */ - LLVMPositionBuilderAtEnd(ctx->builder, ctx->false_block); + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->false_block); } @@ -697,27 +700,27 @@ lp_build_endif(struct lp_build_if_state *ctx) ifthen = &lp_build_flow_pop(flow, LP_BUILD_FLOW_IF)->ifthen; assert(ifthen); - if (ctx->false_block) { - LLVMPositionBuilderAtEnd(ctx->builder, ctx->merge_block); + if (ifthen->false_block) { + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); /* for each variable, update the Phi node with a (variable, block) pair */ for (i = 0; i < flow->num_variables; i++) { assert(*flow->variables[i]); - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ctx->false_block, 1); + LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->false_block, 1); } } else { /* no else clause */ - LLVMPositionBuilderAtEnd(ctx->builder, ctx->merge_block); + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); for (i = 0; i < flow->num_variables; i++) { LLVMValueRef undef; assert(*flow->variables[i]); - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ctx->true_block, 1); + LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->true_block, 1); /* undef value from the block preceeding the 'if' */ undef = LLVMGetUndef(LLVMTypeOf(*flow->variables[i])); - LLVMAddIncoming(ifthen->phi[i], &undef, &ctx->entry_block, 1); + LLVMAddIncoming(ifthen->phi[i], &undef, &ifthen->entry_block, 1); } } @@ -728,28 +731,28 @@ lp_build_endif(struct lp_build_if_state *ctx) ***/ /* Insert the conditional branch instruction at the end of entry_block */ - LLVMPositionBuilderAtEnd(ctx->builder, ctx->entry_block); - if (ctx->false_block) { + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->entry_block); + if (ifthen->false_block) { /* we have an else clause */ - LLVMBuildCondBr(ctx->builder, ctx->condition, - ctx->true_block, ctx->false_block); + LLVMBuildCondBr(ctx->builder, ifthen->condition, + ifthen->true_block, ifthen->false_block); } else { /* no else clause */ - LLVMBuildCondBr(ctx->builder, ctx->condition, - ctx->true_block, ctx->merge_block); + LLVMBuildCondBr(ctx->builder, ifthen->condition, + ifthen->true_block, ifthen->merge_block); } /* Append an unconditional Br(anch) instruction on the true_block */ - LLVMPositionBuilderAtEnd(ctx->builder, ctx->true_block); - LLVMBuildBr(ctx->builder, ctx->merge_block); - if (ctx->false_block) { + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block); + LLVMBuildBr(ctx->builder, ifthen->merge_block); + if (ifthen->false_block) { /* Append an unconditional Br(anch) instruction on the false_block */ - LLVMPositionBuilderAtEnd(ctx->builder, ctx->false_block); - LLVMBuildBr(ctx->builder, ctx->merge_block); + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->false_block); + LLVMBuildBr(ctx->builder, ifthen->merge_block); } - /* Resume building code at end of the ctx->merge_block */ - LLVMPositionBuilderAtEnd(ctx->builder, ctx->merge_block); + /* Resume building code at end of the ifthen->merge_block */ + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.h b/src/gallium/drivers/llvmpipe/lp_bld_flow.h index 7c7cc402a3..4c225a0d4f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.h @@ -131,8 +131,6 @@ struct lp_build_if_state { LLVMBuilderRef builder; struct lp_build_flow_context *flow; - LLVMValueRef condition; - LLVMBasicBlockRef entry_block, true_block, false_block, merge_block; }; -- cgit v1.2.3 From 5208af7853989c30bea6ce8c4ac659a2f2304225 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 8 Jan 2010 12:47:30 -0700 Subject: llvmpipe: fix more if/else/endif design bugs --- src/gallium/drivers/llvmpipe/lp_bld_flow.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index 161ec95d8c..693742ff85 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -647,9 +647,12 @@ lp_build_if(struct lp_build_if_state *ctx, LLVMPositionBuilderAtEnd(builder, ifthen->merge_block); /* create a phi node for each variable */ - for (i = 0; i < flow->num_variables; i++) + for (i = 0; i < flow->num_variables; i++) { ifthen->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); + /* add add the initial value of the var from the entry block */ + LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->entry_block, 1); + } /* create/insert true_block before merge_block */ ifthen->true_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-true-block"); @@ -706,21 +709,20 @@ lp_build_endif(struct lp_build_if_state *ctx) for (i = 0; i < flow->num_variables; i++) { assert(*flow->variables[i]); LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->false_block, 1); + + /* replace the variable ref with the phi function */ + *flow->variables[i] = ifthen->phi[i]; } } else { /* no else clause */ LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); for (i = 0; i < flow->num_variables; i++) { - LLVMValueRef undef; - assert(*flow->variables[i]); - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->true_block, 1); - /* undef value from the block preceeding the 'if' */ - undef = LLVMGetUndef(LLVMTypeOf(*flow->variables[i])); - LLVMAddIncoming(ifthen->phi[i], &undef, &ifthen->entry_block, 1); + /* replace the variable ref with the phi function */ + *flow->variables[i] = ifthen->phi[i]; } } -- cgit v1.2.3 From f4321fbd961a0a891c7f40b16efc61aa791e03a9 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 8 Jan 2010 14:49:34 -0700 Subject: llvmpipe: optimize case when all four pixels are inside the triangle When the incoming c0,c1,c2 values are equal to INT_MIN it means that all pixels are inside the triangle. Thus we can skip the detailed pixel inside/outside triangle tests. Use the new lp_build_if()/endif() functions to generate the branching code. The code is disabled ATM however because it's actually a little slower than the original code. A little more tuning may fix that though... --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 106 +++++++++++++++++++++-------- 1 file changed, 77 insertions(+), 29 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 3a669ba859..293535387a 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -58,6 +58,7 @@ * @author Jose Fonseca */ +#include #include "pipe/p_defines.h" #include "util/u_memory.h" #include "util/u_format.h" @@ -212,14 +213,16 @@ generate_tri_edge_mask(LLVMBuilderRef builder, m2_vec = step2_ptr[i] > c2_vec mask = m0_vec & m1_vec & m2_vec */ + struct lp_build_flow_context *flow; + struct lp_build_if_state ifctx; struct lp_type i32_type; - LLVMTypeRef i32vec4_type; + LLVMTypeRef i32vec4_type, mask_type; - LLVMValueRef index; LLVMValueRef c0_vec, c1_vec, c2_vec; - LLVMValueRef step0_vec, step1_vec, step2_vec; - LLVMValueRef m0_vec, m1_vec, m2_vec; - LLVMValueRef m; + + LLVMValueRef int_min_vec; + LLVMValueRef not_draw_all; + LLVMValueRef in_out_mask; assert(i < 4); @@ -233,6 +236,12 @@ generate_tri_edge_mask(LLVMBuilderRef builder, i32vec4_type = lp_build_int32_vec4_type(); + mask_type = LLVMIntType(32 * 4); + + /* int_min_vec = {INT_MIN, INT_MIN, INT_MIN, INT_MIN} */ + int_min_vec = lp_build_int_const_scalar(i32_type, INT_MIN); + + /* c0_vec = {c0, c0, c0, c0} * Note that we emit this code four times but LLVM optimizes away * three instances of it. @@ -240,34 +249,66 @@ generate_tri_edge_mask(LLVMBuilderRef builder, c0_vec = lp_build_broadcast(builder, i32vec4_type, c0); c1_vec = lp_build_broadcast(builder, i32vec4_type, c1); c2_vec = lp_build_broadcast(builder, i32vec4_type, c2); - lp_build_name(c0_vec, "edgeconst0vec"); lp_build_name(c1_vec, "edgeconst1vec"); lp_build_name(c2_vec, "edgeconst2vec"); - index = LLVMConstInt(LLVMInt32Type(), i, 0); - step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), ""); - step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), ""); - step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), ""); + /* + * Use a conditional here to do detailed pixel in/out testing. + * We only have to do this if c0 != {INT_MIN, INT_MIN, INT_MIN, INT_MIN} + */ + flow = lp_build_flow_create(builder); + lp_build_flow_scope_begin(flow); - lp_build_name(step0_vec, "step0vec"); - lp_build_name(step1_vec, "step1vec"); - lp_build_name(step2_vec, "step2vec"); +#define OPTIMIZE_IN_OUT_TEST 0 +#if OPTIMIZE_IN_OUT_TEST + in_out_mask = lp_build_compare(builder, i32_type, PIPE_FUNC_EQUAL, c0_vec, int_min_vec); + lp_build_name(in_out_mask, "inoutmaskvec"); - m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step0_vec, c0_vec); - m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step1_vec, c1_vec); - m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step2_vec, c2_vec); + not_draw_all = LLVMBuildICmp(builder, + LLVMIntEQ, + LLVMBuildBitCast(builder, in_out_mask, mask_type, ""), + LLVMConstNull(mask_type), + ""); - m = LLVMBuildAnd(builder, m0_vec, m1_vec, ""); - m = LLVMBuildAnd(builder, m, m2_vec, ""); + lp_build_flow_scope_declare(flow, &in_out_mask); - lp_build_name(m, "inoutmaskvec"); + lp_build_if(&ifctx, flow, builder, not_draw_all); +#endif + { + LLVMValueRef step0_vec, step1_vec, step2_vec; + LLVMValueRef m0_vec, m1_vec, m2_vec; + LLVMValueRef index, m; + + index = LLVMConstInt(LLVMInt32Type(), i, 0); + step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), ""); + step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), ""); + step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), ""); + + lp_build_name(step0_vec, "step0vec"); + lp_build_name(step1_vec, "step1vec"); + lp_build_name(step2_vec, "step2vec"); + + m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step0_vec, c0_vec); + m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step1_vec, c1_vec); + m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step2_vec, c2_vec); + + m = LLVMBuildAnd(builder, m0_vec, m1_vec, ""); + in_out_mask = LLVMBuildAnd(builder, m, m2_vec, ""); + lp_build_name(in_out_mask, "inoutmaskvec"); + + /* This is the initial alive/dead pixel mask. Additional bits will get cleared + * when the Z test fails, etc. + */ + } +#if OPTIMIZE_IN_OUT_TEST + lp_build_endif(&ifctx); +#endif - *mask = m; + lp_build_flow_scope_end(flow); + lp_build_flow_destroy(flow); - /* - * if mask = {0,0,0,0} skip quad - */ + *mask = in_out_mask; } @@ -432,6 +473,8 @@ generate_blend(const struct pipe_blend_state *blend, lp_build_context_init(&bld, builder, type); flow = lp_build_flow_create(builder); + + /* we'll use this mask context to skip blending if all pixels are dead */ lp_build_mask_begin(&mask_ctx, flow, type, mask); vec_type = lp_build_vec_type(type); @@ -737,24 +780,29 @@ generate_fragment(struct llvmpipe_context *lp, LLVMDisposeBuilder(builder); - /* - * Translate the LLVM IR into machine code. - */ + /* Verify the LLVM IR. If invalid, dump and abort */ #ifdef DEBUG if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { - LLVMDumpValue(variant->function); - assert(0); + if (1) + LLVMDumpValue(variant->function); + abort(); } #endif - LLVMRunFunctionPassManager(screen->pass, variant->function); + /* Apply optimizations to LLVM IR */ + if (1) + LLVMRunFunctionPassManager(screen->pass, variant->function); if (LP_DEBUG & DEBUG_JIT) { + /* Print the LLVM IR to stderr */ LLVMDumpValue(variant->function); debug_printf("\n"); } + /* + * Translate the LLVM IR into machine code. + */ variant->jit_function = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, variant->function); if (LP_DEBUG & DEBUG_ASM) -- cgit v1.2.3 From c1a04416023e24621e4992caf593e8dfe8d7a2fc Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 10 Jan 2010 17:22:09 +0000 Subject: llvmpipe: initial mrt support Non-mrt apps work, and the code looks correct, but not many mrt test apps handy atm... --- src/gallium/drivers/llvmpipe/lp_flush.c | 7 +- src/gallium/drivers/llvmpipe/lp_jit.h | 2 +- src/gallium/drivers/llvmpipe/lp_rast.c | 215 +++++++++++++++++----------- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 6 +- src/gallium/drivers/llvmpipe/lp_setup.c | 18 +-- src/gallium/drivers/llvmpipe/lp_state.h | 6 + src/gallium/drivers/llvmpipe/lp_state_fs.c | 114 +++++++++------ 7 files changed, 225 insertions(+), 143 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index 9405150c4f..16fb00092e 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -77,8 +77,11 @@ llvmpipe_flush( struct pipe_context *pipe, if(flags & PIPE_FLUSH_FRAME) { static unsigned frame_no = 1; static char filename[256]; - util_snprintf(filename, sizeof(filename), "cbuf_%u.bmp", frame_no); - debug_dump_surface_bmp(filename, llvmpipe->framebuffer.cbufs[0]); + unsigned i; + for (i = 0; i < llvmpipe->framebuffer.nr_cbufs) { + util_snprintf(filename, sizeof(filename), "cbuf%u_%u.bmp", i, frame_no); + debug_dump_surface_bmp(filename, llvmpipe->framebuffer.cbufs[i]); + } util_snprintf(filename, sizeof(filename), "zsbuf_%u.bmp", frame_no); debug_dump_surface_bmp(filename, llvmpipe->framebuffer.zsbuf); ++frame_no; diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 1a6e939aa2..3b316914b0 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -108,7 +108,7 @@ typedef void const void *a0, const void *dadx, const void *dady, - void *color, + uint8_t **color, void *depth, const int32_t c1, const int32_t c2, diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 6535e69308..38c27b90e3 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -53,6 +53,7 @@ lp_rast_begin( struct lp_rasterizer *rast, { struct pipe_screen *screen = rast->screen; struct pipe_surface *cbuf, *zsbuf; + int i; LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); @@ -64,24 +65,27 @@ lp_rast_begin( struct lp_rasterizer *rast, rast->check_for_clipped_tiles = (fb->width % TILE_SIZE != 0 || fb->height % TILE_SIZE != 0); - /* XXX support multiple color buffers here */ - cbuf = rast->state.fb.cbufs[0]; - if (cbuf) { - rast->cbuf_transfer = screen->get_tex_transfer(rast->screen, - cbuf->texture, - cbuf->face, - cbuf->level, - cbuf->zslice, - PIPE_TRANSFER_READ_WRITE, - 0, 0, - fb->width, fb->height); - if (!rast->cbuf_transfer) - return FALSE; - - rast->cbuf_map = screen->transfer_map(rast->screen, - rast->cbuf_transfer); - if (!rast->cbuf_map) - return FALSE; + + for (i = 0; i < rast->state.fb.nr_cbufs; i++) { + cbuf = rast->state.fb.cbufs[i]; + if (cbuf) { + rast->cbuf_transfer[i] = screen->get_tex_transfer(rast->screen, + cbuf->texture, + cbuf->face, + cbuf->level, + cbuf->zslice, + PIPE_TRANSFER_READ_WRITE, + 0, 0, + cbuf->width, + cbuf->height); + if (!rast->cbuf_transfer[i]) + goto fail; + + rast->cbuf_map[i] = screen->transfer_map(rast->screen, + rast->cbuf_transfer[i]); + if (!rast->cbuf_map[i]) + goto fail; + } } zsbuf = rast->state.fb.zsbuf; @@ -93,17 +97,23 @@ lp_rast_begin( struct lp_rasterizer *rast, zsbuf->zslice, PIPE_TRANSFER_READ_WRITE, 0, 0, - fb->width, fb->height); + zsbuf->width, + zsbuf->height); if (!rast->zsbuf_transfer) - return FALSE; + goto fail; rast->zsbuf_map = screen->transfer_map(rast->screen, rast->zsbuf_transfer); if (!rast->zsbuf_map) - return FALSE; + goto fail; } return TRUE; + +fail: + /* Unmap and release transfers? + */ + return FALSE; } @@ -115,22 +125,26 @@ static void lp_rast_end( struct lp_rasterizer *rast ) { struct pipe_screen *screen = rast->screen; + unsigned i; - if (rast->cbuf_map) - screen->transfer_unmap(screen, rast->cbuf_transfer); + for (i = 0; i < rast->state.fb.nr_cbufs; i++) { + if (rast->cbuf_map[i]) + screen->transfer_unmap(screen, rast->cbuf_transfer[i]); + + if (rast->cbuf_transfer[i]) + screen->tex_transfer_destroy(rast->cbuf_transfer[i]); + + rast->cbuf_transfer[i] = NULL; + rast->cbuf_map[i] = NULL; + } if (rast->zsbuf_map) screen->transfer_unmap(screen, rast->zsbuf_transfer); - if (rast->cbuf_transfer) - screen->tex_transfer_destroy(rast->cbuf_transfer); - if (rast->zsbuf_transfer) screen->tex_transfer_destroy(rast->zsbuf_transfer); - rast->cbuf_transfer = NULL; rast->zsbuf_transfer = NULL; - rast->cbuf_map = NULL; rast->zsbuf_map = NULL; } @@ -161,8 +175,9 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { const uint8_t *clear_color = arg.clear_color; - uint8_t *color_tile = rast->tasks[thread_index].tile.color; - + uint8_t **color_tile = rast->tasks[thread_index].tile.color; + unsigned i; + LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__, clear_color[0], clear_color[1], @@ -172,14 +187,17 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, if (clear_color[0] == clear_color[1] && clear_color[1] == clear_color[2] && clear_color[2] == clear_color[3]) { - memset(color_tile, clear_color[0], TILE_SIZE * TILE_SIZE * 4); + for (i = 0; i < rast->state.fb.nr_cbufs; i++) { + memset(color_tile[i], clear_color[0], TILE_SIZE * TILE_SIZE * 4); + } } else { unsigned x, y, chan; - for (y = 0; y < TILE_SIZE; y++) - for (x = 0; x < TILE_SIZE; x++) - for (chan = 0; chan < 4; ++chan) - TILE_PIXEL(color_tile, x, y, chan) = clear_color[chan]; + for (i = 0; i < rast->state.fb.nr_cbufs; i++) + for (y = 0; y < TILE_SIZE; y++) + for (x = 0; x < TILE_SIZE; x++) + for (chan = 0; chan < 4; ++chan) + TILE_PIXEL(color_tile[i], x, y, chan) = clear_color[chan]; } } @@ -214,28 +232,40 @@ void lp_rast_load_color( struct lp_rasterizer *rast, struct lp_rasterizer_task *task = &rast->tasks[thread_index]; const unsigned x = task->x; const unsigned y = task->y; - int w = TILE_SIZE; - int h = TILE_SIZE; + unsigned i; LP_DBG(DEBUG_RAST, "%s at %u, %u\n", __FUNCTION__, x, y); - if (x + w > rast->state.fb.width) - w -= x + w - rast->state.fb.width; + for (i = 0; i < rast->state.fb.nr_cbufs; i++) { + struct pipe_transfer *transfer = rast->cbuf_transfer[i]; + int w = TILE_SIZE; + int h = TILE_SIZE; - if (y + h > rast->state.fb.height) - h -= y + h - rast->state.fb.height; + if (x >= transfer->width) + continue; - assert(w >= 0); - assert(h >= 0); - assert(w <= TILE_SIZE); - assert(h <= TILE_SIZE); - - lp_tile_read_4ub(rast->cbuf_transfer->texture->format, - rast->tasks[thread_index].tile.color, - rast->cbuf_map, - rast->cbuf_transfer->stride, - x, y, - w, h); + if (y >= transfer->height) + continue; + /* XXX: require tile-size aligned render target dimensions: + */ + if (x + w > transfer->width) + w -= x + w - transfer->width; + + if (y + h > transfer->height) + h -= y + h - transfer->height; + + assert(w >= 0); + assert(h >= 0); + assert(w <= TILE_SIZE); + assert(h <= TILE_SIZE); + + lp_tile_read_4ub(transfer->texture->format, + rast->tasks[thread_index].tile.color[i], + rast->cbuf_map[i], + transfer->stride, + x, y, + w, h); + } } @@ -313,8 +343,9 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, { const struct lp_rast_state *state = rast->tasks[thread_index].current_state; struct lp_rast_tile *tile = &rast->tasks[thread_index].tile; - void *color; + uint8_t *color[PIPE_MAX_COLOR_BUFS]; void *depth; + unsigned i; unsigned ix, iy; int block_offset; @@ -336,14 +367,17 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, block_offset = ((iy/4)*(16*16) + (ix/4)*16); /* color buffer */ - color = tile->color + 4 * block_offset; + for (i = 0; i < rast->state.fb.nr_cbufs; i++) + color[i] = tile->color[i] + 4 * block_offset; /* depth buffer */ depth = tile->depth + block_offset; + + #ifdef DEBUG - assert(lp_check_alignment(depth, 16)); - assert(lp_check_alignment(color, 16)); + assert(lp_check_alignment(tile->depth, 16)); + assert(lp_check_alignment(tile->color[0], 16)); assert(lp_check_alignment(state->jit_context.blend_color, 16)); assert(lp_check_alignment(inputs->step[0], 16)); @@ -360,8 +394,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, color, depth, c1, c2, c3, - inputs->step[0], inputs->step[1], inputs->step[2] - ); + inputs->step[0], inputs->step[1], inputs->step[2]); } @@ -377,29 +410,42 @@ static void lp_rast_store_color( struct lp_rasterizer *rast, { const unsigned x = rast->tasks[thread_index].x; const unsigned y = rast->tasks[thread_index].y; - int w = TILE_SIZE; - int h = TILE_SIZE; - - if (x + w > rast->state.fb.width) - w -= x + w - rast->state.fb.width; + unsigned i; - if (y + h > rast->state.fb.height) - h -= y + h - rast->state.fb.height; + for (i = 0; i < rast->state.fb.nr_cbufs; i++) { + struct pipe_transfer *transfer = rast->cbuf_transfer[i]; + int w = TILE_SIZE; + int h = TILE_SIZE; - assert(w >= 0); - assert(h >= 0); - assert(w <= TILE_SIZE); - assert(h <= TILE_SIZE); + if (x >= transfer->width) + continue; - LP_DBG(DEBUG_RAST, "%s [%u] %d,%d %dx%d\n", __FUNCTION__, - thread_index, x, y, w, h); + if (y >= transfer->height) + continue; - lp_tile_write_4ub(rast->cbuf_transfer->texture->format, - rast->tasks[thread_index].tile.color, - rast->cbuf_map, - rast->cbuf_transfer->stride, - x, y, - w, h); + /* XXX: require tile-size aligned render target dimensions: + */ + if (x + w > transfer->width) + w -= x + w - transfer->width; + + if (y + h > transfer->height) + h -= y + h - transfer->height; + + assert(w >= 0); + assert(h >= 0); + assert(w <= TILE_SIZE); + assert(h <= TILE_SIZE); + + LP_DBG(DEBUG_RAST, "%s [%u] %d,%d %dx%d\n", __FUNCTION__, + thread_index, x, y, w, h); + + lp_tile_write_4ub(transfer->texture->format, + rast->tasks[thread_index].tile.color[i], + rast->cbuf_map[i], + transfer->stride, + x, y, + w, h); + } } @@ -600,7 +646,7 @@ lp_rasterize_scene( struct lp_rasterizer *rast, /* no threading */ lp_rast_begin( rast, fb, - fb->cbufs[0]!= NULL, + fb->nr_cbufs != 0, /* always write color if cbufs present */ fb->zsbuf != NULL && write_depth ); lp_scene_bin_iter_begin( scene ); @@ -667,7 +713,7 @@ thread_func( void *init_data ) write_depth = rast->curr_scene->write_depth; lp_rast_begin( rast, fb, - fb->cbufs[0] != NULL, + fb->nr_cbufs != 0, fb->zsbuf != NULL && write_depth ); } @@ -738,7 +784,7 @@ struct lp_rasterizer * lp_rast_create( struct pipe_screen *screen, struct lp_scene_queue *empty ) { struct lp_rasterizer *rast; - unsigned i; + unsigned i, cbuf; rast = CALLOC_STRUCT(lp_rasterizer); if(!rast) @@ -750,7 +796,9 @@ lp_rast_create( struct pipe_screen *screen, struct lp_scene_queue *empty ) rast->full_scenes = lp_scene_queue_create(); for (i = 0; i < Elements(rast->tasks); i++) { - rast->tasks[i].tile.color = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); + for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ ) + rast->tasks[i].tile.color[cbuf] = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); + rast->tasks[i].tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); rast->tasks[i].rast = rast; rast->tasks[i].thread_index = i; @@ -769,13 +817,14 @@ lp_rast_create( struct pipe_screen *screen, struct lp_scene_queue *empty ) */ void lp_rast_destroy( struct lp_rasterizer *rast ) { - unsigned i; + unsigned i, cbuf; util_unreference_framebuffer_state(&rast->state.fb); for (i = 0; i < Elements(rast->tasks); i++) { align_free(rast->tasks[i].tile.depth); - align_free(rast->tasks[i].tile.color); + for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ ) + align_free(rast->tasks[i].tile.color[cbuf]); } /* for synchronizing rasterization threads */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index cd72d7e69d..5afdeab049 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -46,7 +46,7 @@ struct lp_rasterizer; */ struct lp_rast_tile { - uint8_t *color; + uint8_t *color[PIPE_MAX_COLOR_BUFS]; uint32_t *depth; }; @@ -87,9 +87,9 @@ struct lp_rasterizer /* Framebuffer stuff */ struct pipe_screen *screen; - struct pipe_transfer *cbuf_transfer; + struct pipe_transfer *cbuf_transfer[PIPE_MAX_COLOR_BUFS]; struct pipe_transfer *zsbuf_transfer; - void *cbuf_map; + void *cbuf_map[PIPE_MAX_COLOR_BUFS]; void *zsbuf_map; struct { diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 5cdcf4ecc9..74f3054864 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -155,26 +155,26 @@ begin_binning( struct setup_context *setup ) LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - if (setup->fb.cbufs[0]) { + if (setup->fb.nr_cbufs) { if (setup->clear.flags & PIPE_CLEAR_COLOR) lp_scene_bin_everywhere( scene, - lp_rast_clear_color, - setup->clear.color ); + lp_rast_clear_color, + setup->clear.color ); else lp_scene_bin_everywhere( scene, - lp_rast_load_color, - lp_rast_arg_null() ); + lp_rast_load_color, + lp_rast_arg_null() ); } if (setup->fb.zsbuf) { if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) lp_scene_bin_everywhere( scene, - lp_rast_clear_zstencil, - setup->clear.zstencil ); + lp_rast_clear_zstencil, + setup->clear.zstencil ); else lp_scene_bin_everywhere( scene, - lp_rast_load_zstencil, - lp_rast_arg_null() ); + lp_rast_load_zstencil, + lp_rast_arg_null() ); } LP_DBG(DEBUG_SETUP, "%s done\n", __FUNCTION__); diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 25d1353674..cb240cb6e5 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -67,10 +67,16 @@ struct lp_fragment_shader; struct lp_fragment_shader_variant_key { enum pipe_format zsbuf_format; + unsigned nr_cbufs; + struct pipe_depth_state depth; struct pipe_alpha_state alpha; struct pipe_blend_state blend; + struct { + ubyte colormask; + } cbuf_blend[PIPE_MAX_COLOR_BUFS]; + struct lp_sampler_static_state sampler[PIPE_MAX_SAMPLERS]; }; diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 293535387a..01912d6ea2 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -327,7 +327,7 @@ generate_fs(struct llvmpipe_context *lp, const struct lp_build_interp_soa_context *interp, struct lp_build_sampler_soa *sampler, LLVMValueRef *pmask, - LLVMValueRef *color, + LLVMValueRef (*color)[4], LLVMValueRef depth_ptr, LLVMValueRef c0, LLVMValueRef c1, @@ -348,6 +348,7 @@ generate_fs(struct llvmpipe_context *lp, boolean early_depth_test; unsigned attrib; unsigned chan; + unsigned cbuf; assert(i < 4); @@ -364,9 +365,11 @@ generate_fs(struct llvmpipe_context *lp, lp_build_flow_scope_begin(flow); /* Declare the color and z variables */ - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - color[chan] = LLVMGetUndef(vec_type); - lp_build_flow_scope_declare(flow, &color[chan]); + for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + color[cbuf][chan] = LLVMGetUndef(vec_type); + lp_build_flow_scope_declare(flow, &color[cbuf][chan]); + } } lp_build_flow_scope_declare(flow, &z); @@ -407,6 +410,7 @@ generate_fs(struct llvmpipe_context *lp, /* Alpha test */ /* XXX: should the alpha reference value be passed separately? */ + /* XXX: should only test the final assignment to alpha */ if(cbuf == 0 && chan == 3) { LLVMValueRef alpha = outputs[attrib][chan]; LLVMValueRef alpha_ref_value; @@ -416,9 +420,7 @@ generate_fs(struct llvmpipe_context *lp, &mask, alpha, alpha_ref_value); } - if(cbuf == 0) - color[chan] = outputs[attrib][chan]; - + color[cbuf][chan] = outputs[attrib][chan]; break; } @@ -539,7 +541,7 @@ generate_fragment(struct llvmpipe_context *lp, LLVMValueRef a0_ptr; LLVMValueRef dadx_ptr; LLVMValueRef dady_ptr; - LLVMValueRef color_ptr; + LLVMValueRef color_ptr_ptr; LLVMValueRef depth_ptr; LLVMValueRef c0, c1, c2, step0_ptr, step1_ptr, step2_ptr; LLVMBasicBlockRef block; @@ -549,12 +551,13 @@ generate_fragment(struct llvmpipe_context *lp, struct lp_build_sampler_soa *sampler; struct lp_build_interp_soa_context interp; LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH]; - LLVMValueRef fs_out_color[NUM_CHANNELS][LP_MAX_VECTOR_LENGTH]; + LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][LP_MAX_VECTOR_LENGTH]; LLVMValueRef blend_mask; LLVMValueRef blend_in_color[NUM_CHANNELS]; unsigned num_fs; unsigned i; unsigned chan; + unsigned cbuf; if (LP_DEBUG & DEBUG_JIT) { tgsi_dump(shader->base.tokens, 0); @@ -651,7 +654,7 @@ generate_fragment(struct llvmpipe_context *lp, arg_types[3] = LLVMPointerType(fs_elem_type, 0); /* a0 */ arg_types[4] = LLVMPointerType(fs_elem_type, 0); /* dadx */ arg_types[5] = LLVMPointerType(fs_elem_type, 0); /* dady */ - arg_types[6] = LLVMPointerType(blend_vec_type, 0); /* color */ + arg_types[6] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */ arg_types[7] = LLVMPointerType(fs_int_vec_type, 0); /* depth */ arg_types[8] = LLVMInt32Type(); /* c0 */ arg_types[9] = LLVMInt32Type(); /* c1 */ @@ -667,6 +670,10 @@ generate_fragment(struct llvmpipe_context *lp, variant->function = LLVMAddFunction(screen->module, "shader", func_type); LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); + + /* XXX: need to propagate noalias down into color param now we are + * passing a pointer-to-pointer? + */ for(i = 0; i < Elements(arg_types); ++i) if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute); @@ -677,7 +684,7 @@ generate_fragment(struct llvmpipe_context *lp, a0_ptr = LLVMGetParam(variant->function, 3); dadx_ptr = LLVMGetParam(variant->function, 4); dady_ptr = LLVMGetParam(variant->function, 5); - color_ptr = LLVMGetParam(variant->function, 6); + color_ptr_ptr = LLVMGetParam(variant->function, 6); depth_ptr = LLVMGetParam(variant->function, 7); c0 = LLVMGetParam(variant->function, 8); c1 = LLVMGetParam(variant->function, 9); @@ -692,7 +699,7 @@ generate_fragment(struct llvmpipe_context *lp, lp_build_name(a0_ptr, "a0"); lp_build_name(dadx_ptr, "dadx"); lp_build_name(dady_ptr, "dady"); - lp_build_name(color_ptr, "color"); + lp_build_name(color_ptr_ptr, "color_ptr"); lp_build_name(depth_ptr, "depth"); lp_build_name(c0, "c0"); lp_build_name(c1, "c1"); @@ -721,8 +728,9 @@ generate_fragment(struct llvmpipe_context *lp, /* loop over quads in the block */ for(i = 0; i < num_fs; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef out_color[NUM_CHANNELS]; + LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS]; LLVMValueRef depth_ptr_i; + int cbuf; if(i != 0) lp_build_interp_soa_update(&interp, i); @@ -742,40 +750,50 @@ generate_fragment(struct llvmpipe_context *lp, c0, c1, c2, step0_ptr, step1_ptr, step2_ptr); - for(chan = 0; chan < NUM_CHANNELS; ++chan) - fs_out_color[chan][i] = out_color[chan]; + for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) + for(chan = 0; chan < NUM_CHANNELS; ++chan) + fs_out_color[cbuf][chan][i] = out_color[cbuf][chan]; } sampler->destroy(sampler); - /* - * Convert the fs's output color and mask to fit to the blending type. + /* Loop over color outputs / color buffers to do blending. */ + for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { + LLVMValueRef color_ptr; + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), cbuf, 0); - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - lp_build_conv(builder, fs_type, blend_type, - fs_out_color[chan], num_fs, - &blend_in_color[chan], 1); - lp_build_name(blend_in_color[chan], "color.%c", "rgba"[chan]); + /* + * Convert the fs's output color and mask to fit to the blending type. + */ + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + lp_build_conv(builder, fs_type, blend_type, + fs_out_color[cbuf][chan], num_fs, + &blend_in_color[chan], 1); + lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]); + } + lp_build_conv_mask(builder, fs_type, blend_type, + fs_mask, num_fs, + &blend_mask, 1); + + color_ptr = LLVMBuildLoad(builder, + LLVMBuildGEP(builder, color_ptr_ptr, &index, 1, ""), + ""); + lp_build_name(color_ptr, "color_ptr%d", cbuf); + + /* + * Blending. + */ + generate_blend(&key->blend, + builder, + blend_type, + context_ptr, + blend_mask, + blend_in_color, + color_ptr); } - lp_build_conv_mask(builder, fs_type, blend_type, - fs_mask, num_fs, - &blend_mask, 1); - - /* - * Blending. - */ - - generate_blend(&key->blend, - builder, - blend_type, - context_ptr, - blend_mask, - blend_in_color, - color_ptr); - LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); @@ -940,21 +958,27 @@ make_variant_key(struct llvmpipe_context *lp, key->alpha.func = lp->depth_stencil->alpha.func; /* alpha.ref_value is passed in jit_context */ - if(lp->framebuffer.cbufs[0]) { + if (lp->framebuffer.nr_cbufs) { + memcpy(&key->blend, lp->blend, sizeof key->blend); + } + + key->nr_cbufs = lp->framebuffer.nr_cbufs; + for (i = 0; i < lp->framebuffer.nr_cbufs; i++) { const struct util_format_description *format_desc; unsigned chan; - memcpy(&key->blend, lp->blend, sizeof key->blend); - - format_desc = util_format_description(lp->framebuffer.cbufs[0]->format); + format_desc = util_format_description(lp->framebuffer.cbufs[i]->format); assert(format_desc->layout == UTIL_FORMAT_COLORSPACE_RGB || format_desc->layout == UTIL_FORMAT_COLORSPACE_SRGB); - /* mask out color channels not present in the color buffer */ + /* mask out color channels not present in the color buffer. + * Should be simple to incorporate per-cbuf writemasks: + */ for(chan = 0; chan < 4; ++chan) { enum util_format_swizzle swizzle = format_desc->swizzle[chan]; - if(swizzle > 4) - key->blend.colormask &= ~(1 << chan); + + if(swizzle <= UTIL_FORMAT_SWIZZLE_W) + key->cbuf_blend[i].colormask |= (1 << chan); } } -- cgit v1.2.3 From 16c1ad54bc5f3dc47a8a17cbc3724d913f5da005 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 6 Jan 2010 18:41:42 +0000 Subject: llvmpipe: remove scissor cliprect derived state Was previously calculating the intersection of the scissor rectangle and the framebuffer dimensions. Rendering is already restricted to framebuffer dimensions by other means, so scissor testing (when implemented) can just use the scissor state directly. --- src/gallium/drivers/llvmpipe/lp_context.h | 3 -- src/gallium/drivers/llvmpipe/lp_state_derived.c | 38 ------------------------- 2 files changed, 41 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 1ede6a6a72..5390a71369 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -96,9 +96,6 @@ struct llvmpipe_context { /** Which vertex shader output slot contains point size */ int psize_slot; - /** Derived from scissor and surface bounds: */ - struct pipe_scissor_state cliprect; - /** The tiling engine */ struct setup_context *setup; diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 78d046985b..af02fcfec9 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -136,39 +136,6 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) -/** - * Recompute cliprect from scissor bounds, scissor enable and surface size. - */ -static void -compute_cliprect(struct llvmpipe_context *lp) -{ - /* LP_NEW_FRAMEBUFFER - */ - uint surfWidth = lp->framebuffer.width; - uint surfHeight = lp->framebuffer.height; - - /* LP_NEW_RASTERIZER - */ - if (lp->rasterizer->scissor) { - - /* LP_NEW_SCISSOR - * - * clip to scissor rect: - */ - lp->cliprect.minx = MAX2(lp->scissor.minx, 0); - lp->cliprect.miny = MAX2(lp->scissor.miny, 0); - lp->cliprect.maxx = MIN2(lp->scissor.maxx, surfWidth); - lp->cliprect.maxy = MIN2(lp->scissor.maxy, surfHeight); - } - else { - /* clip to surface bounds */ - lp->cliprect.minx = 0; - lp->cliprect.miny = 0; - lp->cliprect.maxx = surfWidth; - lp->cliprect.maxy = surfHeight; - } -} - /* Hopefully this will remain quite simple, otherwise need to pull in * something like the state tracker mechanism. @@ -189,11 +156,6 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) LP_NEW_VS)) compute_vertex_info( llvmpipe ); - if (llvmpipe->dirty & (LP_NEW_SCISSOR | - LP_NEW_RASTERIZER | - LP_NEW_FRAMEBUFFER)) - compute_cliprect(llvmpipe); - if (llvmpipe->dirty & (LP_NEW_FS | LP_NEW_BLEND | LP_NEW_DEPTH_STENCIL_ALPHA | -- cgit v1.2.3 From 094525fb23127e7ca253d732207bbbbe00488a6b Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 8 Jan 2010 13:55:32 +0000 Subject: llvmpipe: remove opencoded constant --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index fe34903cf3..5197dca8f9 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -287,10 +287,10 @@ do_triangle_ccw(struct setup_context *setup, } /* Bounding rectangle (in pixels) */ - tri->minx = (MIN3(x1, x2, x3) + 0xf) >> FIXED_ORDER; - tri->maxx = (MAX3(x1, x2, x3) + 0xf) >> FIXED_ORDER; - tri->miny = (MIN3(y1, y2, y3) + 0xf) >> FIXED_ORDER; - tri->maxy = (MAX3(y1, y2, y3) + 0xf) >> FIXED_ORDER; + tri->minx = (MIN3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER; + tri->maxx = (MAX3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER; + tri->miny = (MIN3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER; + tri->maxy = (MAX3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER; if (tri->miny == tri->maxy || tri->minx == tri->maxx) { -- cgit v1.2.3 From ad74ea286951634d49d500f2e5ce740072794fe2 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 11 Jan 2010 12:02:53 +0000 Subject: st/mesa: early exit on error path Can't rely on asserts having any effect on flowcontrol for release builds. --- src/mesa/state_tracker/st_cb_condrender.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/state_tracker/st_cb_condrender.c b/src/mesa/state_tracker/st_cb_condrender.c index 780b40c206..e2cd80b404 100644 --- a/src/mesa/state_tracker/st_cb_condrender.c +++ b/src/mesa/state_tracker/st_cb_condrender.c @@ -69,6 +69,7 @@ st_BeginConditionalRender(GLcontext *ctx, struct gl_query_object *q, break; default: assert(0 && "bad mode in st_BeginConditionalRender"); + return; } pipe->render_condition(pipe, stq->pq, m); -- cgit v1.2.3 From 86f450060debebd66dd5fb72f83800d7634efeaa Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 11 Jan 2010 12:06:51 +0000 Subject: llvmpipe: force constant interpolation of flatshade colors Nice speedup for gears. --- src/gallium/drivers/llvmpipe/lp_bld_interp.c | 11 ++++++++++- src/gallium/drivers/llvmpipe/lp_bld_interp.h | 1 + src/gallium/drivers/llvmpipe/lp_state.h | 6 +++--- src/gallium/drivers/llvmpipe/lp_state_derived.c | 1 + src/gallium/drivers/llvmpipe/lp_state_fs.c | 7 ++++++- 5 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index daedf40d55..a6acaead88 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -316,6 +316,7 @@ pos_update(struct lp_build_interp_soa_context *bld, int quad_index) void lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, const struct tgsi_token *tokens, + boolean flatshade, LLVMBuilderRef builder, struct lp_type type, LLVMValueRef a0_ptr, @@ -358,7 +359,15 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, for( attrib = first; attrib <= last; ++attrib ) { bld->mask[1 + attrib] = mask; - bld->mode[1 + attrib] = decl->Declaration.Interpolate; + + /* XXX: have mesa set INTERP_CONSTANT in the fragment + * shader. + */ + if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR && + flatshade) + bld->mode[1 + attrib] = TGSI_INTERPOLATE_CONSTANT; + else + bld->mode[1 + attrib] = decl->Declaration.Interpolate; } bld->num_attribs = MAX2(bld->num_attribs, 1 + last + 1); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h index e2b3bc1bf0..ca958cdf34 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h @@ -79,6 +79,7 @@ struct lp_build_interp_soa_context void lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, const struct tgsi_token *tokens, + boolean flatshade, LLVMBuilderRef builder, struct lp_type type, LLVMValueRef a0_ptr, diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index cb240cb6e5..4c6747bb2b 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -66,12 +66,12 @@ struct lp_fragment_shader; struct lp_fragment_shader_variant_key { - enum pipe_format zsbuf_format; - unsigned nr_cbufs; - struct pipe_depth_state depth; struct pipe_alpha_state alpha; struct pipe_blend_state blend; + enum pipe_format zsbuf_format; + unsigned nr_cbufs:8; + unsigned flatshade:1; struct { ubyte colormask; diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index af02fcfec9..632cafa2e6 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -159,6 +159,7 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) if (llvmpipe->dirty & (LP_NEW_FS | LP_NEW_BLEND | LP_NEW_DEPTH_STENCIL_ALPHA | + LP_NEW_RASTERIZER | LP_NEW_SAMPLER | LP_NEW_TEXTURE)) llvmpipe_update_fs( llvmpipe ); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 01912d6ea2..7ce7202777 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -718,7 +718,10 @@ generate_fragment(struct llvmpipe_context *lp, generate_pos0(builder, x, y, &x0, &y0); - lp_build_interp_soa_init(&interp, shader->base.tokens, builder, fs_type, + lp_build_interp_soa_init(&interp, + shader->base.tokens, + key->flatshade, + builder, fs_type, a0_ptr, dadx_ptr, dady_ptr, x0, y0); @@ -958,6 +961,8 @@ make_variant_key(struct llvmpipe_context *lp, key->alpha.func = lp->depth_stencil->alpha.func; /* alpha.ref_value is passed in jit_context */ + key->flatshade = lp->rasterizer->flatshade; + if (lp->framebuffer.nr_cbufs) { memcpy(&key->blend, lp->blend, sizeof key->blend); } -- cgit v1.2.3 From 46b5bd6cadd13f47c10aafe9194c90234db91a2a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 11 Jan 2010 12:59:36 -0700 Subject: llvmpipe: do the all-in test on the scalar c0 instead of vector c0 This still isn't faster, but committing it for posterity. --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 45 ++++++++++++++---------------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 7ce7202777..6816db4387 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -217,10 +217,7 @@ generate_tri_edge_mask(LLVMBuilderRef builder, struct lp_build_if_state ifctx; struct lp_type i32_type; LLVMTypeRef i32vec4_type, mask_type; - LLVMValueRef c0_vec, c1_vec, c2_vec; - - LLVMValueRef int_min_vec; LLVMValueRef not_draw_all; LLVMValueRef in_out_mask; @@ -238,21 +235,6 @@ generate_tri_edge_mask(LLVMBuilderRef builder, mask_type = LLVMIntType(32 * 4); - /* int_min_vec = {INT_MIN, INT_MIN, INT_MIN, INT_MIN} */ - int_min_vec = lp_build_int_const_scalar(i32_type, INT_MIN); - - - /* c0_vec = {c0, c0, c0, c0} - * Note that we emit this code four times but LLVM optimizes away - * three instances of it. - */ - c0_vec = lp_build_broadcast(builder, i32vec4_type, c0); - c1_vec = lp_build_broadcast(builder, i32vec4_type, c1); - c2_vec = lp_build_broadcast(builder, i32vec4_type, c2); - lp_build_name(c0_vec, "edgeconst0vec"); - lp_build_name(c1_vec, "edgeconst1vec"); - lp_build_name(c2_vec, "edgeconst2vec"); - /* * Use a conditional here to do detailed pixel in/out testing. * We only have to do this if c0 != {INT_MIN, INT_MIN, INT_MIN, INT_MIN} @@ -260,17 +242,19 @@ generate_tri_edge_mask(LLVMBuilderRef builder, flow = lp_build_flow_create(builder); lp_build_flow_scope_begin(flow); -#define OPTIMIZE_IN_OUT_TEST 0 + { +#define OPTIMIZE_IN_OUT_TEST 1 #if OPTIMIZE_IN_OUT_TEST - in_out_mask = lp_build_compare(builder, i32_type, PIPE_FUNC_EQUAL, c0_vec, int_min_vec); - lp_build_name(in_out_mask, "inoutmaskvec"); not_draw_all = LLVMBuildICmp(builder, - LLVMIntEQ, - LLVMBuildBitCast(builder, in_out_mask, mask_type, ""), - LLVMConstNull(mask_type), + LLVMIntNE, + c0, + LLVMConstInt(LLVMInt32Type(), INT_MIN, 0), ""); + in_out_mask = lp_build_int_const_scalar(i32_type, ~0); + + lp_build_flow_scope_declare(flow, &in_out_mask); lp_build_if(&ifctx, flow, builder, not_draw_all); @@ -280,6 +264,18 @@ generate_tri_edge_mask(LLVMBuilderRef builder, LLVMValueRef m0_vec, m1_vec, m2_vec; LLVMValueRef index, m; + /* c0_vec = {c0, c0, c0, c0} + * Note that we emit this code four times but LLVM optimizes away + * three instances of it. + */ + c0_vec = lp_build_broadcast(builder, i32vec4_type, c0); + c1_vec = lp_build_broadcast(builder, i32vec4_type, c1); + c2_vec = lp_build_broadcast(builder, i32vec4_type, c2); + lp_build_name(c0_vec, "edgeconst0vec"); + lp_build_name(c1_vec, "edgeconst1vec"); + lp_build_name(c2_vec, "edgeconst2vec"); + + index = LLVMConstInt(LLVMInt32Type(), i, 0); step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), ""); step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), ""); @@ -305,6 +301,7 @@ generate_tri_edge_mask(LLVMBuilderRef builder, lp_build_endif(&ifctx); #endif + } lp_build_flow_scope_end(flow); lp_build_flow_destroy(flow); -- cgit v1.2.3 From 3b5d84926847cf2008da4e2dc146090d0c1b5402 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 11 Jan 2010 13:16:00 -0700 Subject: llvmpipe: refactor generate_fragment() code This will make it easier to generate multiple versions of the fragment code per variant. --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 138 ++++++++++++++++------------- 1 file changed, 76 insertions(+), 62 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 6816db4387..c4ca0f1d5e 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -515,13 +515,13 @@ generate_blend(const struct pipe_blend_state *blend, * pixels at at time. The block contains 2x2 quads. Each quad contains * 2x2 pixels. */ -static struct lp_fragment_shader_variant * +static void generate_fragment(struct llvmpipe_context *lp, struct lp_fragment_shader *shader, - const struct lp_fragment_shader_variant_key *key) + struct lp_fragment_shader_variant *variant) { struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); - struct lp_fragment_shader_variant *variant; + const struct lp_fragment_shader_variant_key *key = &variant->key; struct lp_type fs_type; struct lp_type blend_type; LLVMTypeRef fs_elem_type; @@ -556,64 +556,6 @@ generate_fragment(struct llvmpipe_context *lp, unsigned chan; unsigned cbuf; - if (LP_DEBUG & DEBUG_JIT) { - tgsi_dump(shader->base.tokens, 0); - if(key->depth.enabled) { - debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format)); - debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE)); - debug_printf("depth.writemask = %u\n", key->depth.writemask); - } - if(key->alpha.enabled) { - debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE)); - debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value); - } - if(key->blend.logicop_enable) { - debug_printf("blend.logicop_func = %u\n", key->blend.logicop_func); - } - else if(key->blend.blend_enable) { - debug_printf("blend.rgb_func = %s\n", debug_dump_blend_func (key->blend.rgb_func, TRUE)); - debug_printf("rgb_src_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE)); - debug_printf("rgb_dst_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE)); - debug_printf("alpha_func = %s\n", debug_dump_blend_func (key->blend.alpha_func, TRUE)); - debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE)); - debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE)); - } - debug_printf("blend.colormask = 0x%x\n", key->blend.colormask); - for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) { - if(key->sampler[i].format) { - debug_printf("sampler[%u] = \n", i); - debug_printf(" .format = %s\n", - pf_name(key->sampler[i].format)); - debug_printf(" .target = %s\n", - debug_dump_tex_target(key->sampler[i].target, TRUE)); - debug_printf(" .pot = %u %u %u\n", - key->sampler[i].pot_width, - key->sampler[i].pot_height, - key->sampler[i].pot_depth); - debug_printf(" .wrap = %s %s %s\n", - debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE), - debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE), - debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE)); - debug_printf(" .min_img_filter = %s\n", - debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE)); - debug_printf(" .min_mip_filter = %s\n", - debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); - debug_printf(" .mag_img_filter = %s\n", - debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); - if(key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE) - debug_printf(" .compare_func = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE)); - debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); - debug_printf(" .prefilter = %u\n", key->sampler[i].prefilter); - } - } - } - - variant = CALLOC_STRUCT(lp_fragment_shader_variant); - if(!variant) - return NULL; - - variant->shader = shader; - memcpy(&variant->key, key, sizeof *key); /* TODO: actually pick these based on the fs and color buffer * characteristics. */ @@ -828,6 +770,78 @@ generate_fragment(struct llvmpipe_context *lp, variant->next = shader->variants; shader->variants = variant; +} + + +static struct lp_fragment_shader_variant * +generate_variant(struct llvmpipe_context *lp, + struct lp_fragment_shader *shader, + const struct lp_fragment_shader_variant_key *key) +{ + struct lp_fragment_shader_variant *variant; + + if (LP_DEBUG & DEBUG_JIT) { + unsigned i; + + tgsi_dump(shader->base.tokens, 0); + if(key->depth.enabled) { + debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format)); + debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE)); + debug_printf("depth.writemask = %u\n", key->depth.writemask); + } + if(key->alpha.enabled) { + debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE)); + debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value); + } + if(key->blend.logicop_enable) { + debug_printf("blend.logicop_func = %u\n", key->blend.logicop_func); + } + else if(key->blend.blend_enable) { + debug_printf("blend.rgb_func = %s\n", debug_dump_blend_func (key->blend.rgb_func, TRUE)); + debug_printf("rgb_src_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE)); + debug_printf("rgb_dst_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE)); + debug_printf("alpha_func = %s\n", debug_dump_blend_func (key->blend.alpha_func, TRUE)); + debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE)); + debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE)); + } + debug_printf("blend.colormask = 0x%x\n", key->blend.colormask); + for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) { + if(key->sampler[i].format) { + debug_printf("sampler[%u] = \n", i); + debug_printf(" .format = %s\n", + pf_name(key->sampler[i].format)); + debug_printf(" .target = %s\n", + debug_dump_tex_target(key->sampler[i].target, TRUE)); + debug_printf(" .pot = %u %u %u\n", + key->sampler[i].pot_width, + key->sampler[i].pot_height, + key->sampler[i].pot_depth); + debug_printf(" .wrap = %s %s %s\n", + debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE), + debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE), + debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE)); + debug_printf(" .min_img_filter = %s\n", + debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE)); + debug_printf(" .min_mip_filter = %s\n", + debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); + debug_printf(" .mag_img_filter = %s\n", + debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); + if(key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE) + debug_printf(" .compare_func = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE)); + debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); + debug_printf(" .prefilter = %u\n", key->sampler[i].prefilter); + } + } + } + + variant = CALLOC_STRUCT(lp_fragment_shader_variant); + if(!variant) + return NULL; + + variant->shader = shader; + memcpy(&variant->key, key, sizeof *key); + + generate_fragment(lp, shader, variant); return variant; } @@ -1008,7 +1022,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) } if(!variant) - variant = generate_fragment(lp, shader, &key); + variant = generate_variant(lp, shader, &key); shader->current = variant; -- cgit v1.2.3 From 9a10d14a441ca76c5c9ea8986a2eb4b5923a9b9f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 11 Jan 2010 15:30:17 -0700 Subject: llvmpipe: move, update comments --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index c4ca0f1d5e..8939e7e622 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -204,15 +204,6 @@ generate_tri_edge_mask(LLVMBuilderRef builder, LLVMValueRef step1_ptr, /* ivec4 */ LLVMValueRef step2_ptr) /* ivec4 */ { - /* - c0_vec = splat(c0) - c1_vec = splat(c1) - c2_vec = splat(c2) - m0_vec = step0_ptr[i] > c0_vec - m1_vec = step1_ptr[i] > c1_vec - m2_vec = step2_ptr[i] > c2_vec - mask = m0_vec & m1_vec & m2_vec - */ struct lp_build_flow_context *flow; struct lp_build_if_state ifctx; struct lp_type i32_type; @@ -237,7 +228,7 @@ generate_tri_edge_mask(LLVMBuilderRef builder, /* * Use a conditional here to do detailed pixel in/out testing. - * We only have to do this if c0 != {INT_MIN, INT_MIN, INT_MIN, INT_MIN} + * We only have to do this if c0 != INT_MIN. */ flow = lp_build_flow_create(builder); lp_build_flow_scope_begin(flow); @@ -245,7 +236,7 @@ generate_tri_edge_mask(LLVMBuilderRef builder, { #define OPTIMIZE_IN_OUT_TEST 1 #if OPTIMIZE_IN_OUT_TEST - + /* not_draw_all = (c0 != INT_MIN) */ not_draw_all = LLVMBuildICmp(builder, LLVMIntNE, c0, @@ -257,6 +248,7 @@ generate_tri_edge_mask(LLVMBuilderRef builder, lp_build_flow_scope_declare(flow, &in_out_mask); + /* if (not_draw_all) {... */ lp_build_if(&ifctx, flow, builder, not_draw_all); #endif { @@ -275,27 +267,24 @@ generate_tri_edge_mask(LLVMBuilderRef builder, lp_build_name(c1_vec, "edgeconst1vec"); lp_build_name(c2_vec, "edgeconst2vec"); - + /* load step0vec, step1, step2 vec from memory */ index = LLVMConstInt(LLVMInt32Type(), i, 0); step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), ""); step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), ""); step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), ""); - lp_build_name(step0_vec, "step0vec"); lp_build_name(step1_vec, "step1vec"); lp_build_name(step2_vec, "step2vec"); + /* m0_vec = step0_ptr[i] > c0_vec */ m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step0_vec, c0_vec); m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step1_vec, c1_vec); m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step2_vec, c2_vec); + /* in_out_mask = m0_vec & m1_vec & m2_vec */ m = LLVMBuildAnd(builder, m0_vec, m1_vec, ""); in_out_mask = LLVMBuildAnd(builder, m, m2_vec, ""); lp_build_name(in_out_mask, "inoutmaskvec"); - - /* This is the initial alive/dead pixel mask. Additional bits will get cleared - * when the Z test fails, etc. - */ } #if OPTIMIZE_IN_OUT_TEST lp_build_endif(&ifctx); @@ -305,6 +294,10 @@ generate_tri_edge_mask(LLVMBuilderRef builder, lp_build_flow_scope_end(flow); lp_build_flow_destroy(flow); + /* This is the initial alive/dead pixel mask for a quad of four pixels. + * It's an int[4] vector with each word set to 0 or ~0. + * Words will get cleared when pixels faile the Z test, etc. + */ *mask = in_out_mask; } -- cgit v1.2.3 From 5cf46309698dc53ee8c383b9510ee21a3d74015e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 11 Jan 2010 15:30:54 -0700 Subject: llvmpipe: disable the all in/out test code for now It's still faster not to try to special case the "all pixels are known to be inside the triangle" case. --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 8939e7e622..2db8ab2566 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -234,7 +234,7 @@ generate_tri_edge_mask(LLVMBuilderRef builder, lp_build_flow_scope_begin(flow); { -#define OPTIMIZE_IN_OUT_TEST 1 +#define OPTIMIZE_IN_OUT_TEST 0 #if OPTIMIZE_IN_OUT_TEST /* not_draw_all = (c0 != INT_MIN) */ not_draw_all = LLVMBuildICmp(builder, -- cgit v1.2.3 From a32e9b2a2d62d64b9c22c4c7af9e0f009189b0a2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 12 Jan 2010 10:11:36 -0700 Subject: llvmpipe: remove unused #define --- src/gallium/drivers/llvmpipe/lp_bld_flow.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index 693742ff85..bc83138908 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -41,9 +41,6 @@ #define LP_BUILD_FLOW_MAX_VARIABLES 32 #define LP_BUILD_FLOW_MAX_DEPTH 32 -#define LP_BUILD_IF_MAX_VARIABLES 8 - - /** * Enumeration of all possible flow constructs. */ -- cgit v1.2.3 From 4061ca02dd837950201a9ada462f944ae25deeb5 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 12 Jan 2010 13:01:32 -0700 Subject: llvmpipe: silence unused var warnings --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 2db8ab2566..c6f5801876 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -204,12 +204,15 @@ generate_tri_edge_mask(LLVMBuilderRef builder, LLVMValueRef step1_ptr, /* ivec4 */ LLVMValueRef step2_ptr) /* ivec4 */ { - struct lp_build_flow_context *flow; +#define OPTIMIZE_IN_OUT_TEST 0 +#if OPTIMIZE_IN_OUT_TEST struct lp_build_if_state ifctx; + LLVMValueRef not_draw_all; +#endif + struct lp_build_flow_context *flow; struct lp_type i32_type; LLVMTypeRef i32vec4_type, mask_type; LLVMValueRef c0_vec, c1_vec, c2_vec; - LLVMValueRef not_draw_all; LLVMValueRef in_out_mask; assert(i < 4); @@ -234,7 +237,6 @@ generate_tri_edge_mask(LLVMBuilderRef builder, lp_build_flow_scope_begin(flow); { -#define OPTIMIZE_IN_OUT_TEST 0 #if OPTIMIZE_IN_OUT_TEST /* not_draw_all = (c0 != INT_MIN) */ not_draw_all = LLVMBuildICmp(builder, -- cgit v1.2.3 From de10168a462f57ead41800ea135476bb5ae8c678 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 12 Jan 2010 17:06:19 -0700 Subject: llvmpipe: added lp_scene_is_empty() --- src/gallium/drivers/llvmpipe/lp_scene.c | 22 ++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_scene.h | 2 ++ 2 files changed, 24 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index 774a1fecd7..70d5847d8e 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -66,6 +66,28 @@ lp_scene_init(struct lp_scene *scene) } +/** + * Check if the scene's bins are all empty. + * For debugging purposes. + */ +boolean +lp_scene_is_empty(struct lp_scene *scene ) +{ + unsigned x, y; + + for (y = 0; y < TILES_Y; y++) { + for (x = 0; x < TILES_X; x++) { + const struct cmd_bin *bin = lp_scene_get_bin(scene, x, y); + const struct cmd_block_list *list = &bin->commands; + if (list->head != list->tail || list->head->count > 0) { + return FALSE; + } + } + } + return TRUE; +} + + /** * Set scene to empty state. */ diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index 796fc516cc..7255727785 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -133,6 +133,8 @@ void lp_scene_destroy(struct lp_scene *scene); void lp_scene_init(struct lp_scene *scene); +boolean lp_scene_is_empty(struct lp_scene *scene ); + void lp_scene_reset(struct lp_scene *scene ); void lp_scene_free_bin_data(struct lp_scene *scene); -- cgit v1.2.3 From 214ffad01598c8780417b9fa9df75e951c8ac049 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 12 Jan 2010 17:08:07 -0700 Subject: llvmpipe: clamp maxx,maxy to framebuffer size (in terms of tiles) In some corner cases the right-most / bottom-most vertex can be right on the edge of the framebuffer. Because the maxx, maxy vals are computed with a series of float/int, pixel/tile transformations we can end up with maxx >= scene->x_tiles or maxy >= scene->y_tiles. This leads to putting data into bins that never get processed, or reset. This becomes stale data that can lead to segfaults. Clamping fixes this. --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 5197dca8f9..9248125de8 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -389,6 +389,11 @@ do_triangle_ccw(struct setup_context *setup, maxx = tri->maxx / TILE_SIZE; maxy = tri->maxy / TILE_SIZE; + /* Clamp maxx, maxy to framebuffer size + */ + maxx = MIN2(maxx, scene->tiles_x - 1); + maxy = MIN2(maxy, scene->tiles_y - 1); + /* Determine which tile(s) intersect the triangle's bounding box */ if (miny == maxy && minx == maxx) -- cgit v1.2.3 From c560b97b17a009f5ea8423523cd3a70fe7b506e9 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 12 Jan 2010 17:11:40 -0700 Subject: llvmpipe: assert that we're putting data into a valid bin --- src/gallium/drivers/llvmpipe/lp_scene.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index 7255727785..b59b687002 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -227,6 +227,9 @@ lp_scene_bin_command( struct lp_scene *scene, struct cmd_bin *bin = lp_scene_get_bin(scene, x, y); struct cmd_block_list *list = &bin->commands; + assert(x < scene->tiles_x); + assert(y < scene->tiles_y); + if (list->tail->count == CMD_BLOCK_MAX) { lp_bin_new_cmd_block( list ); } -- cgit v1.2.3 From ec9cfac7682268a2d81d6edaf81f8fbb770e17ed Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 12 Jan 2010 17:12:07 -0700 Subject: llvmpipe: debug checks: make sure scene is empty at key points --- src/gallium/drivers/llvmpipe/lp_scene.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index 70d5847d8e..7ec3206b81 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -116,6 +116,8 @@ lp_scene_reset(struct lp_scene *scene ) } } + assert(lp_scene_is_empty(scene)); + /* Free all but last binned data block: */ { @@ -163,6 +165,8 @@ void lp_scene_set_framebuffer_size( struct lp_scene *scene, unsigned width, unsigned height ) { + assert(lp_scene_is_empty(scene)); + scene->tiles_x = align(width, TILE_SIZE) / TILE_SIZE; scene->tiles_y = align(height, TILE_SIZE) / TILE_SIZE; } -- cgit v1.2.3 From 7e4c75c040bfd93fafb3a3ebbda25db8bd948e18 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 12 Jan 2010 17:12:49 -0700 Subject: llvmpipe: fix indentation, comment typo --- src/gallium/drivers/llvmpipe/lp_scene.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index 7ec3206b81..45d5446267 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -163,7 +163,7 @@ lp_scene_free_bin_data(struct lp_scene *scene) void lp_scene_set_framebuffer_size( struct lp_scene *scene, - unsigned width, unsigned height ) + unsigned width, unsigned height ) { assert(lp_scene_is_empty(scene)); @@ -259,8 +259,8 @@ lp_replace_last_command_arg( struct cmd_bin *bin, */ void lp_scene_bin_state_command( struct lp_scene *scene, - lp_rast_cmd cmd, - const union lp_rast_cmd_arg arg ) + lp_rast_cmd cmd, + const union lp_rast_cmd_arg arg ) { unsigned i, j; for (i = 0; i < scene->tiles_x; i++) { @@ -303,7 +303,7 @@ lp_scene_bin_iter_begin( struct lp_scene *scene ) /** - * Return point to next bin to be rendered. + * Return pointer to next bin to be rendered. * The lp_scene::curr_x and ::curr_y fields will be advanced. * Multiple rendering threads will call this function to get a chunk * of work (a bin) to work on. -- cgit v1.2.3 From da45f49cc63fff06513dc28d9616084fc81798d4 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 13 Jan 2010 14:41:02 +0000 Subject: llvmpipe: quick hack to short-circuit empty bins --- src/gallium/drivers/llvmpipe/lp_rast.c | 23 ++++++++++++++++++++++- src/gallium/drivers/llvmpipe/lp_setup.c | 4 +++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 38c27b90e3..4c13d4d80b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -576,6 +576,26 @@ rasterize_bin( struct lp_rasterizer *rast, lp_rast_end_tile( rast, thread_index ); } +static boolean +is_empty_bin( struct lp_rasterizer *rast, + const struct cmd_bin *bin ) +{ + const struct cmd_block *head = bin->commands.head; + int i; + + if (head->next != NULL || + head->count > PIPE_MAX_COLOR_BUFS + 1) + return FALSE; + + for (i = 0; i < head->count; i++) + if (head->cmd[i] != lp_rast_load_color && + head->cmd[i] != lp_rast_load_zstencil) + return FALSE; + + return TRUE; +} + + /** * Rasterize/execute all bins within a scene. @@ -606,7 +626,8 @@ rasterize_scene( struct lp_rasterizer *rast, assert(scene); while ((bin = lp_scene_bin_iter_next(scene, &x, &y))) { - rasterize_bin( rast, thread_index, bin, x * TILE_SIZE, y * TILE_SIZE); + if (!is_empty_bin( rast, bin )) + rasterize_bin( rast, thread_index, bin, x * TILE_SIZE, y * TILE_SIZE); } } #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 74f3054864..38ea0c663f 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -153,7 +153,9 @@ begin_binning( struct setup_context *setup ) { struct lp_scene *scene = lp_setup_get_current_scene(setup); - LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + LP_DBG(DEBUG_SETUP, "%s color: %s depth: %s\n", __FUNCTION__, + (setup->clear.flags & PIPE_CLEAR_COLOR) ? "clear": "load", + (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) ? "clear": "load"); if (setup->fb.nr_cbufs) { if (setup->clear.flags & PIPE_CLEAR_COLOR) -- cgit v1.2.3 From f4b29e6ad38939318ce233ad28c70a608e7db0bd Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 13 Jan 2010 15:49:24 +0000 Subject: llvmpipe: improve empty-bin test We emit at most two clear packets (color and z respectively). --- src/gallium/drivers/llvmpipe/lp_rast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 4c13d4d80b..9606418a37 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -584,7 +584,7 @@ is_empty_bin( struct lp_rasterizer *rast, int i; if (head->next != NULL || - head->count > PIPE_MAX_COLOR_BUFS + 1) + head->count > 2) return FALSE; for (i = 0; i < head->count; i++) -- cgit v1.2.3 From db83ad4b4353ea6f9c755f18bf1455ea78b5bf12 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 13 Jan 2010 16:29:39 +0000 Subject: llvmpipe: improve empty-bin test further Remove unused param, add comments. Thanks to Brian for review. --- src/gallium/drivers/llvmpipe/lp_rast.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 9606418a37..0a8d730580 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -576,13 +576,23 @@ rasterize_bin( struct lp_rasterizer *rast, lp_rast_end_tile( rast, thread_index ); } +/* An empty bin is one that just loads the contents of the tile and + * stores them again unchanged. This typically happens when bins have + * been flushed for some reason in the middle of a frame, or when + * incremental updates are being made to a render target. + * + * Try to avoid doing pointless work in this case. + */ static boolean -is_empty_bin( struct lp_rasterizer *rast, - const struct cmd_bin *bin ) +is_empty_bin( const struct cmd_bin *bin ) { const struct cmd_block *head = bin->commands.head; int i; + /* We emit at most two load-tile commands at the start of the first + * command block. If there are more than two commands in the + * block, we know that the bin is non-empty. + */ if (head->next != NULL || head->count > 2) return FALSE; @@ -626,7 +636,7 @@ rasterize_scene( struct lp_rasterizer *rast, assert(scene); while ((bin = lp_scene_bin_iter_next(scene, &x, &y))) { - if (!is_empty_bin( rast, bin )) + if (!is_empty_bin( bin )) rasterize_bin( rast, thread_index, bin, x * TILE_SIZE, y * TILE_SIZE); } } -- cgit v1.2.3 From 212f3a6cb3cb49127a6e5588553fe2d327f46563 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 09:29:43 -0700 Subject: llvmpipe: tweak subpixel_snap() arithmetic This adjustment fixes some rasterization differences between llvmpipe and softpipe (and other renderers). --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 9248125de8..575265b0f5 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -237,7 +237,7 @@ static void setup_tri_coefficients( struct setup_context *setup, static inline int subpixel_snap( float a ) { - return util_iround(FIXED_ONE * a); + return util_iround(FIXED_ONE * a - (FIXED_ONE / 2)); } -- cgit v1.2.3 From 4439aab7b73c235b64df60f9f62fda3492dbfdc5 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 09:31:36 -0700 Subject: llvmpipe: comments and LLVMValueRef naming --- src/gallium/drivers/llvmpipe/lp_bld_arit.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index 2df86dd32e..54b31befe6 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -874,6 +874,9 @@ lp_build_iround(struct lp_build_context *bld, } +/** + * Convert float[] to int[] with floor(). + */ LLVMValueRef lp_build_ifloor(struct lp_build_context *bld, LLVMValueRef a) @@ -900,6 +903,7 @@ lp_build_ifloor(struct lp_build_context *bld, sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); sign = LLVMBuildAnd(bld->builder, sign, mask, ""); sign = LLVMBuildAShr(bld->builder, sign, lp_build_int_const_scalar(type, type.width - 1), ""); + lp_build_name(sign, "floor.sign"); /* offset = -0.99999(9)f */ offset = lp_build_const_scalar(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa)); @@ -908,11 +912,14 @@ lp_build_ifloor(struct lp_build_context *bld, /* offset = a < 0 ? -0.99999(9)f : 0.0f */ offset = LLVMBuildAnd(bld->builder, offset, sign, ""); offset = LLVMBuildBitCast(bld->builder, offset, vec_type, ""); + lp_build_name(offset, "floor.offset"); res = LLVMBuildAdd(bld->builder, a, offset, ""); + lp_build_name(res, "floor.res"); } res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, ""); + lp_build_name(res, "floor"); return res; } -- cgit v1.2.3 From 0bb5c3060f8784d6d6828b1455e736cd8f6416cb Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 09:32:10 -0700 Subject: llvmpipe: added debug warning --- src/gallium/drivers/llvmpipe/lp_texture.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 2462378152..a953e8845a 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -177,6 +177,7 @@ llvmpipe_texture_blanket(struct pipe_screen * screen, return &lpt->base; #else + debug_printf("llvmpipe_texture_blanket() not implemented!"); return NULL; #endif } -- cgit v1.2.3 From 95ee14f147e713bd132dc56a1151232957752c90 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 13 Jan 2010 16:52:17 +0000 Subject: llvmpipe: implement lp_rast_load_zstencil Load zbuffer contents for binned scenes that don't start with a clear and which have a bound zbuffer. --- src/gallium/drivers/llvmpipe/lp_rast.c | 36 ++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 0a8d730580..7753f9bb3f 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -269,6 +269,23 @@ void lp_rast_load_color( struct lp_rasterizer *rast, } +static void +lp_tile_read_z32(uint32_t *tile, + const uint8_t *map, + unsigned map_stride, + unsigned x0, unsigned y0, unsigned w, unsigned h) +{ + unsigned x, y; + const uint8_t *map_row = map + y0*map_stride; + for (y = 0; y < h; ++y) { + const uint32_t *map_pixel = (uint32_t *)(map_row + x0*4); + for (x = 0; x < w; ++x) { + *tile++ = *map_pixel++; + } + map_row += map_stride; + } +} + /** * Load tile z/stencil from the framebuffer surface. * This is a bin command called during bin processing. @@ -277,9 +294,24 @@ void lp_rast_load_zstencil( struct lp_rasterizer *rast, unsigned thread_index, const union lp_rast_cmd_arg arg ) { - LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); + const unsigned x = rast->tasks[thread_index].x; + const unsigned y = rast->tasks[thread_index].y; + unsigned w = TILE_SIZE; + unsigned h = TILE_SIZE; + + if (x + w > rast->state.fb.width) + w -= x + w - rast->state.fb.width; - /* call u_tile func to load depth (and stencil?) from surface */ + if (y + h > rast->state.fb.height) + h -= y + h - rast->state.fb.height; + + LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); + + assert(rast->zsbuf_transfer->texture->format == PIPE_FORMAT_Z32_UNORM); + lp_tile_read_z32(rast->tasks[thread_index].tile.depth, + rast->zsbuf_map, + rast->zsbuf_transfer->stride, + x, y, w, h); } -- cgit v1.2.3 From 4231006e29cbf9fb54c72acf35009f3b18fe62ab Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 13 Jan 2010 20:14:04 +0000 Subject: llvmpipe: add bin debugger Adjust definition of empty_bin according to what's actually in empty bins. We often have a state packet before/after load commands. Still need to do something about the fence packets. --- src/gallium/drivers/llvmpipe/lp_rast.c | 57 +++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 7753f9bb3f..6c7ece9fdb 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -608,6 +608,44 @@ rasterize_bin( struct lp_rasterizer *rast, lp_rast_end_tile( rast, thread_index ); } + +#define RAST(x) { lp_rast_##x, #x } + +static struct { + lp_rast_cmd cmd; + const char *name; +} cmd_names[] = +{ + RAST(load_color), + RAST(load_zstencil), + RAST(clear_color), + RAST(clear_zstencil), + RAST(triangle), + RAST(shade_tile), + RAST(set_state), + RAST(fence), +}; + +static void +debug_bin( const struct cmd_bin *bin ) +{ + const struct cmd_block *head = bin->commands.head; + int i, j; + + for (i = 0; i < head->count; i++) { + debug_printf("%d: ", i); + for (j = 0; j < Elements(cmd_names); j++) { + if (head->cmd[i] == cmd_names[j].cmd) { + debug_printf("%s\n", cmd_names[j].name); + break; + } + } + if (j == Elements(cmd_names)) + debug_printf("...other\n"); + } + +} + /* An empty bin is one that just loads the contents of the tile and * stores them again unchanged. This typically happens when bins have * been flushed for some reason in the middle of a frame, or when @@ -620,19 +658,28 @@ is_empty_bin( const struct cmd_bin *bin ) { const struct cmd_block *head = bin->commands.head; int i; - + + if (0) + debug_bin(bin); + /* We emit at most two load-tile commands at the start of the first - * command block. If there are more than two commands in the - * block, we know that the bin is non-empty. + * command block. In addition we seem to emit a couple of + * set-state commands even in empty bins. + * + * As a heuristic, if a bin has more than 4 commands, consider it + * non-empty. */ if (head->next != NULL || - head->count > 2) + head->count > 4) { return FALSE; + } for (i = 0; i < head->count; i++) if (head->cmd[i] != lp_rast_load_color && - head->cmd[i] != lp_rast_load_zstencil) + head->cmd[i] != lp_rast_load_zstencil && + head->cmd[i] != lp_rast_set_state) { return FALSE; + } return TRUE; } -- cgit v1.2.3 From a1acbff299c444913418e65da473745cd901a2db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Wed, 13 Jan 2010 21:51:47 +0000 Subject: llvmpipe: Reset the bin when shading a whole tile with an opaque shader. --- src/gallium/drivers/llvmpipe/lp_rast.h | 2 ++ src/gallium/drivers/llvmpipe/lp_scene.c | 33 +++++++++++++++++------------ src/gallium/drivers/llvmpipe/lp_scene.h | 4 ++++ src/gallium/drivers/llvmpipe/lp_setup.c | 4 +++- src/gallium/drivers/llvmpipe/lp_setup.h | 3 ++- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 6 ++++++ src/gallium/drivers/llvmpipe/lp_state_fs.c | 12 ++++++++++- 7 files changed, 48 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 46e22f69a6..d926adb6b2 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -68,6 +68,8 @@ struct lp_rast_state { * the tile color/z/stencil data somehow: */ lp_jit_frag_func jit_function; + + boolean opaque; }; diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index 45d5446267..967d666bb4 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -88,6 +88,25 @@ lp_scene_is_empty(struct lp_scene *scene ) } +void +lp_scene_bin_reset(struct lp_scene *scene, unsigned x, unsigned y) +{ + struct cmd_bin *bin = lp_scene_get_bin(scene, x, y); + struct cmd_block_list *list = &bin->commands; + struct cmd_block *block; + struct cmd_block *tmp; + + for (block = list->head; block != list->tail; block = tmp) { + tmp = block->next; + FREE(block); + } + + assert(list->tail->next == NULL); + list->head = list->tail; + list->head->count = 0; +} + + /** * Set scene to empty state. */ @@ -100,19 +119,7 @@ lp_scene_reset(struct lp_scene *scene ) */ for (i = 0; i < scene->tiles_x; i++) { for (j = 0; j < scene->tiles_y; j++) { - struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); - struct cmd_block_list *list = &bin->commands; - struct cmd_block *block; - struct cmd_block *tmp; - - for (block = list->head; block != list->tail; block = tmp) { - tmp = block->next; - FREE(block); - } - - assert(list->tail->next == NULL); - list->head = list->tail; - list->head->count = 0; + lp_scene_bin_reset(scene, i, j); } } diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index b59b687002..4b6527d67c 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -215,6 +215,10 @@ lp_scene_get_bin(struct lp_scene *scene, unsigned x, unsigned y) } +/** Remove all commands from a bin */ +void +lp_scene_bin_reset(struct lp_scene *scene, unsigned x, unsigned y); + /* Add a command to bin[x][y]. */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 38ea0c663f..61b968c49f 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -364,12 +364,14 @@ lp_setup_set_fs_inputs( struct setup_context *setup, void lp_setup_set_fs_function( struct setup_context *setup, - lp_jit_frag_func jit_function ) + lp_jit_frag_func jit_function, + boolean opaque ) { LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) jit_function); /* FIXME: reference count */ setup->fs.current.jit_function = jit_function; + setup->fs.current.opaque = opaque; setup->dirty |= LP_SETUP_NEW_FS; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index bf12cb8527..bac7d73e8d 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -97,7 +97,8 @@ lp_setup_set_fs_inputs( struct setup_context *setup, void lp_setup_set_fs_function( struct setup_context *setup, - lp_jit_frag_func jit_function ); + lp_jit_frag_func jit_function, + boolean opaque ); void lp_setup_set_fs_constants(struct setup_context *setup, diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 575265b0f5..0f5b25b725 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -459,6 +459,12 @@ do_triangle_ccw(struct setup_context *setup, { in = 1; /* triangle covers the whole tile- shade whole tile */ + if(setup->fs.current.opaque) { + lp_scene_bin_reset( scene, x, y ); + lp_scene_bin_command( scene, x, y, + lp_rast_set_state, + lp_rast_arg_state(setup->fs.stored) ); + } lp_scene_bin_command( scene, x, y, lp_rast_shade_tile, lp_rast_arg_inputs(&tri->inputs) ); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index c6f5801876..1ed9a2f5bf 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -1005,6 +1005,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) struct lp_fragment_shader *shader = lp->fs; struct lp_fragment_shader_variant_key key; struct lp_fragment_shader_variant *variant; + boolean opaque; make_variant_key(lp, shader, &key); @@ -1021,6 +1022,15 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) shader->current = variant; + /* TODO: put this in the variant */ + opaque = !key.blend.logicop_enable && + !key.blend.blend_enable && + !key.alpha.enabled && + !key.depth.enabled && + !shader->info.uses_kill + ? TRUE : FALSE; + lp_setup_set_fs_function(lp->setup, - shader->current->jit_function); + shader->current->jit_function, + opaque); } -- cgit v1.2.3 From 7df4c88088ecf34764c558b4f7fe7ef6c82327bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Wed, 13 Jan 2010 22:07:24 +0000 Subject: llvmpipe: Opaque shader implies complete colormask too. --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 1ed9a2f5bf..ea5868dee1 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -1023,8 +1023,10 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) shader->current = variant; /* TODO: put this in the variant */ + /* TODO: most of these can be relaxed, in particular the colormask */ opaque = !key.blend.logicop_enable && !key.blend.blend_enable && + key.blend.colormask == 0xf && !key.alpha.enabled && !key.depth.enabled && !shader->info.uses_kill -- cgit v1.2.3 From ddb94661a43fe50a0a058a56b05c65ee0cc204d9 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 10:22:02 -0700 Subject: llvmpipe: minor comment upgrades --- src/gallium/drivers/llvmpipe/lp_setup_context.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index a1808fcd4c..66654ec5e7 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -75,7 +75,7 @@ struct setup_context */ struct draw_stage *vbuf; struct lp_rasterizer *rast; - struct lp_scene *scene; /**< current scene */ + struct lp_scene *scene; /**< current scene being built */ struct lp_scene_queue *empty_scenes; /**< queue of empty scenes */ boolean flatshade_first; @@ -116,7 +116,7 @@ struct setup_context uint8_t *stored; } blend_color; - unsigned dirty; /**< bitmask of LP_SETUP_x bits */ + unsigned dirty; /**< bitmask of LP_SETUP_NEW_x bits */ void (*point)( struct setup_context *, const float (*v0)[4]); -- cgit v1.2.3 From 9be1feacf2dbd36fa9fb65b9932a74f04a7d9cca Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 10:57:44 -0700 Subject: llvmpipe: added llvmpipe_texture_const() --- src/gallium/drivers/llvmpipe/lp_texture.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h index 00a20763e4..e37ef6059a 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.h +++ b/src/gallium/drivers/llvmpipe/lp_texture.h @@ -73,6 +73,13 @@ llvmpipe_texture(struct pipe_texture *pt) return (struct llvmpipe_texture *) pt; } +static INLINE const struct llvmpipe_texture * +llvmpipe_texture_const(const struct pipe_texture *pt) +{ + return (const struct llvmpipe_texture *) pt; +} + + static INLINE struct llvmpipe_transfer * llvmpipe_transfer(struct pipe_transfer *pt) { -- cgit v1.2.3 From a36395d4875f5c416f1b1a6d2ed7f933e40c379c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 10:58:13 -0700 Subject: llvmpipe: comments --- src/gallium/drivers/llvmpipe/lp_state_derived.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 632cafa2e6..2c349fdb1d 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -135,9 +135,11 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) } - - -/* Hopefully this will remain quite simple, otherwise need to pull in +/** + * Handle state changes. + * Called just prior to drawing anything (pipe::draw_arrays(), etc). + * + * Hopefully this will remain quite simple, otherwise need to pull in * something like the state tracker mechanism. */ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) -- cgit v1.2.3 From 4769328fe1ddaa1882dddbaad21239d5fdcddf19 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 10:58:36 -0700 Subject: llvmpipe: comments --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index ea5868dee1..26a2d6cc23 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -999,6 +999,10 @@ make_variant_key(struct llvmpipe_context *lp, } +/** + * Update fragment state. This is called just prior to drawing + * something when some fragment-related state has changed. + */ void llvmpipe_update_fs(struct llvmpipe_context *lp) { -- cgit v1.2.3 From 592e40aa7bdbda5a09becb898300393d599c033a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 13:43:58 -0700 Subject: llvmpipe: added scene functions for texture reference counting When a texture is used in the scene we add it to a list of texture references. The lp_scene_is_textured_referenced() function tells us if a texture is referenced by the scene. --- src/gallium/drivers/llvmpipe/lp_scene.c | 48 +++++++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_scene.h | 16 +++++++++++ 2 files changed, 64 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index 967d666bb4..191122de7d 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -27,6 +27,7 @@ #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_simple_list.h" #include "lp_scene.h" @@ -62,6 +63,8 @@ lp_scene_init(struct lp_scene *scene) scene->data.head = scene->data.tail = CALLOC_STRUCT(data_block); + make_empty_list(&scene->textures); + pipe_mutex_init(scene->mutex); } @@ -140,6 +143,18 @@ lp_scene_reset(struct lp_scene *scene ) list->head = list->tail; list->head->used = 0; } + + /* Release texture refs + */ + { + struct texture_ref *ref, *next, *ref_list = &scene->textures; + for (ref = ref_list->next; ref != ref_list; ref = next) { + next = next_elem(ref); + pipe_texture_reference(&ref->texture, NULL); + FREE(ref); + } + make_empty_list(ref_list); + } } @@ -229,6 +244,39 @@ lp_scene_bin_size( const struct lp_scene *scene, unsigned x, unsigned y ) } +/** + * Add a reference to a texture by the scene. + */ +void +lp_scene_texture_reference( struct lp_scene *scene, + struct pipe_texture *texture ) +{ + struct texture_ref *ref = CALLOC_STRUCT(texture_ref); + if (ref) { + struct texture_ref *ref_list = &scene->textures; + pipe_texture_reference(&ref->texture, texture); + insert_at_tail(ref_list, ref); + } +} + + +/** + * Does this scene have a reference to the given texture? + */ +boolean +lp_scene_is_textured_referenced( const struct lp_scene *scene, + const struct pipe_texture *texture ) +{ + const struct texture_ref *ref_list = &scene->textures; + const struct texture_ref *ref; + foreach (ref, ref_list) { + if (ref->texture == texture) + return TRUE; + } + return FALSE; +} + + /** * Return last command in the bin */ diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index 4b6527d67c..86facf8eac 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -97,6 +97,13 @@ struct data_block_list { }; +/** List of texture references */ +struct texture_ref { + struct pipe_texture *texture; + struct texture_ref *prev, *next; /**< linked list w/ u_simple_list.h */ +}; + + /** * All bins and bin data are contained here. * Per-bin data goes into the 'tile' bins. @@ -112,6 +119,9 @@ struct lp_scene { /** the framebuffer to render the scene into */ struct pipe_framebuffer_state fb; + /** list of textures referenced by the scene commands */ + struct texture_ref textures; + boolean write_depth; /** @@ -150,6 +160,12 @@ unsigned lp_scene_data_size( const struct lp_scene *scene ); unsigned lp_scene_bin_size( const struct lp_scene *scene, unsigned x, unsigned y ); +void lp_scene_texture_reference( struct lp_scene *scene, + struct pipe_texture *texture ); + +boolean lp_scene_is_textured_referenced( const struct lp_scene *scene, + const struct pipe_texture *texture ); + /** * Allocate space for a command/data in the bin's data buffer. -- cgit v1.2.3 From a27b12171d84c6e731af08f48a657c377f8549ba Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 13:54:46 -0700 Subject: llvmpipe: add scene texture referencing code --- src/gallium/drivers/llvmpipe/lp_setup.c | 29 +++++++++++++++++++++-------- src/gallium/drivers/llvmpipe/lp_setup.h | 2 +- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 61b968c49f..bac2db92ba 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -432,25 +432,22 @@ lp_setup_set_vertex_info( struct setup_context *setup, } +/** + * Called during state validation when LP_NEW_TEXTURE is set. + */ void lp_setup_set_sampler_textures( struct setup_context *setup, unsigned num, struct pipe_texture **texture) { - struct pipe_texture *dummy; unsigned i; LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - assert(num <= PIPE_MAX_SAMPLERS); for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { struct pipe_texture *tex = i < num ? texture[i] : NULL; - /* FIXME: hold on to the reference */ - dummy = NULL; - pipe_texture_reference(&dummy, tex); - if(tex) { struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex); struct lp_jit_texture *jit_tex; @@ -463,21 +460,37 @@ lp_setup_set_sampler_textures( struct setup_context *setup, else /* FIXME: map the rendertarget */ assert(0); + + /* the scene references this texture */ + { + struct lp_scene *scene = lp_setup_get_current_scene(setup); + lp_scene_texture_reference(scene, tex); + } } } setup->dirty |= LP_SETUP_NEW_FS; } + +/** + * Is the given texture referenced in the setup module's current scene? + */ boolean -lp_setup_is_texture_referenced( struct setup_context *setup, +lp_setup_is_texture_referenced( const struct setup_context *setup, const struct pipe_texture *texture ) { - /* FIXME */ + const struct lp_scene *scene = setup->scene; + if (scene && lp_scene_is_textured_referenced(scene, texture)) { + return PIPE_REFERENCED_FOR_READ; + } return PIPE_UNREFERENCED; } +/** + * Called by vbuf code when we're about to draw something. + */ void lp_setup_update_state( struct setup_context *setup ) { diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index bac7d73e8d..429abeba43 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -118,7 +118,7 @@ lp_setup_set_sampler_textures( struct setup_context *setup, unsigned num, struct pipe_texture **texture); boolean -lp_setup_is_texture_referenced( struct setup_context *setup, +lp_setup_is_texture_referenced( const struct setup_context *setup, const struct pipe_texture *texture ); void -- cgit v1.2.3 From d59fe448967addb3025d7df90888ff950e03a343 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 14:51:26 -0700 Subject: llvmpipe: check for texture usage in all scenes --- src/gallium/drivers/llvmpipe/lp_setup.c | 20 ++++++++++---------- src/gallium/drivers/llvmpipe/lp_setup_context.h | 6 ++++++ 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index bac2db92ba..8193b107d9 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -50,10 +50,6 @@ #include "draw/draw_vbuf.h" -/** XXX temporary value, temporary here */ -#define MAX_SCENES 2 - - static void set_scene_state( struct setup_context *, unsigned ); @@ -474,15 +470,19 @@ lp_setup_set_sampler_textures( struct setup_context *setup, /** - * Is the given texture referenced in the setup module's current scene? + * Is the given texture referenced by any scene? + * Note: we have to check all scenes including any scenes currently + * being rendered and the current scene being built. */ boolean lp_setup_is_texture_referenced( const struct setup_context *setup, const struct pipe_texture *texture ) { - const struct lp_scene *scene = setup->scene; - if (scene && lp_scene_is_textured_referenced(scene, texture)) { - return PIPE_REFERENCED_FOR_READ; + unsigned i; + for (i = 0; i < Elements(setup->scenes); i++) { + if (lp_scene_is_textured_referenced(setup->scenes[i], texture)) { + return PIPE_REFERENCED_FOR_READ; + } } return PIPE_UNREFERENCED; } @@ -645,8 +645,8 @@ lp_setup_create( struct pipe_screen *screen, /* create some empty scenes */ for (i = 0; i < MAX_SCENES; i++) { - struct lp_scene *scene = lp_scene_create(); - lp_scene_enqueue(setup->empty_scenes, scene); + setup->scenes[i] = lp_scene_create(); + lp_scene_enqueue(setup->empty_scenes, setup->scenes[i]); } setup->triangle = first_triangle; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 66654ec5e7..e6f6f0e0bb 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -50,6 +50,11 @@ struct lp_scene_queue; +/** Max number of scenes */ +#define MAX_SCENES 2 + + + /** * Point/line/triangle setup context. * Note: "stored" below indicates data which is stored in the bins, @@ -75,6 +80,7 @@ struct setup_context */ struct draw_stage *vbuf; struct lp_rasterizer *rast; + struct lp_scene *scenes[MAX_SCENES]; /**< all the scenes */ struct lp_scene *scene; /**< current scene being built */ struct lp_scene_queue *empty_scenes; /**< queue of empty scenes */ -- cgit v1.2.3 From db58192cfb63cbb7b1d84e7ae7429799ce888164 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 15:01:35 -0700 Subject: llvmpipe: re-get scene pointer after flushing --- src/gallium/drivers/llvmpipe/lp_setup.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 8193b107d9..4f77d04ca5 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -249,6 +249,9 @@ lp_setup_bind_framebuffer( struct setup_context *setup, set_scene_state( setup, SETUP_FLUSHED ); + /* re-get scene pointer, may have a new scene after flushing */ + scene = lp_setup_get_current_scene(setup); + util_copy_framebuffer_state(&setup->fb, fb); lp_scene_set_framebuffer_size(scene, setup->fb.width, setup->fb.height); -- cgit v1.2.3 From 0b279c5382da021a71cdc8ed3afa09983817539c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 15:03:42 -0700 Subject: llvmpipe: indentation fixes --- src/gallium/drivers/llvmpipe/lp_setup.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 4f77d04ca5..11b1b5f319 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -292,13 +292,13 @@ lp_setup_clear( struct setup_context *setup, */ if (flags & PIPE_CLEAR_COLOR) lp_scene_bin_everywhere( scene, - lp_rast_clear_color, - setup->clear.color ); + lp_rast_clear_color, + setup->clear.color ); if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) lp_scene_bin_everywhere( scene, - lp_rast_clear_zstencil, - setup->clear.zstencil ); + lp_rast_clear_zstencil, + setup->clear.zstencil ); } else { /* Put ourselves into the 'pre-clear' state, specifically to try -- cgit v1.2.3 From 12872774461a84f0a7c272aff5aac5e30a78a7c2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 15:30:42 -0700 Subject: llvmpipe: also check render target textures in lp_setup_is_texture_referenced() --- src/gallium/drivers/llvmpipe/lp_setup.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 11b1b5f319..ce006bf618 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -487,6 +487,15 @@ lp_setup_is_texture_referenced( const struct setup_context *setup, return PIPE_REFERENCED_FOR_READ; } } + + /* check the render targets */ + for (i = 0; i < setup->fb.nr_cbufs; i++) { + if (setup->fb.cbufs[i]->texture == texture) + return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; + } + if (setup->fb.zsbuf && setup->fb.zsbuf->texture == texture) + return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; + return PIPE_UNREFERENCED; } -- cgit v1.2.3 From 018b78ad649e88cc6d8b6b10aef1502075508515 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 15:32:55 -0700 Subject: llvmpipe: check render targets before other textures --- src/gallium/drivers/llvmpipe/lp_setup.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index ce006bf618..649e97992b 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -482,19 +482,22 @@ lp_setup_is_texture_referenced( const struct setup_context *setup, const struct pipe_texture *texture ) { unsigned i; - for (i = 0; i < Elements(setup->scenes); i++) { - if (lp_scene_is_textured_referenced(setup->scenes[i], texture)) { - return PIPE_REFERENCED_FOR_READ; - } - } /* check the render targets */ for (i = 0; i < setup->fb.nr_cbufs; i++) { if (setup->fb.cbufs[i]->texture == texture) return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; } - if (setup->fb.zsbuf && setup->fb.zsbuf->texture == texture) + if (setup->fb.zsbuf && setup->fb.zsbuf->texture == texture) { return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; + } + + /* check textures referenced by the scene */ + for (i = 0; i < Elements(setup->scenes); i++) { + if (lp_scene_is_textured_referenced(setup->scenes[i], texture)) { + return PIPE_REFERENCED_FOR_READ; + } + } return PIPE_UNREFERENCED; } -- cgit v1.2.3 From f19f218e7aad76639a6aacabda8101ba87bb4896 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 18:01:45 -0700 Subject: llvmpipe: minor assorted clean-ups --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 0f5b25b725..39ad983a35 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -36,6 +36,7 @@ #define NUM_CHANNELS 4 + /** * Compute a0 for a constant-valued coefficient (GL_FLAT shading). */ @@ -45,10 +46,11 @@ static void constant_coef( struct lp_rast_triangle *tri, unsigned i ) { tri->inputs.a0[slot][i] = value; - tri->inputs.dadx[slot][i] = 0; - tri->inputs.dady[slot][i] = 0; + tri->inputs.dadx[slot][i] = 0.0f; + tri->inputs.dady[slot][i] = 0.0f; } + /** * Compute a0, dadx and dady for a linearly interpolated coefficient, * for a triangle. @@ -184,8 +186,7 @@ static void setup_tri_coefficients( struct setup_context *setup, /* Allocate space for the a0, dadx and dady arrays */ { - unsigned bytes; - bytes = (setup->fs.nr_inputs + 1) * 4 * sizeof(float); + unsigned bytes = (setup->fs.nr_inputs + 1) * 4 * sizeof(float); tri->inputs.a0 = lp_scene_alloc_aligned( scene, bytes, 16 ); tri->inputs.dadx = lp_scene_alloc_aligned( scene, bytes, 16 ); tri->inputs.dady = lp_scene_alloc_aligned( scene, bytes, 16 ); @@ -281,7 +282,7 @@ do_triangle_ccw(struct setup_context *setup, * * XXX: subject to overflow?? */ - if (area <= 0) { + if (area <= 0.0f) { lp_scene_putback_data( scene, sizeof *tri ); return; } @@ -306,8 +307,7 @@ do_triangle_ccw(struct setup_context *setup, */ setup_tri_coefficients( setup, tri, oneoverarea, v1, v2, v3, frontfacing ); - /* half-edge constants, will be interated over the whole - * rendertarget. + /* half-edge constants, will be interated over the whole render target. */ tri->c1 = tri->dy12 * x1 - tri->dx12 * y1; tri->c2 = tri->dy23 * x2 - tri->dx23 * y2; @@ -494,6 +494,7 @@ do_triangle_ccw(struct setup_context *setup, } } + static void triangle_cw( struct setup_context *setup, const float (*v0)[4], const float (*v1)[4], @@ -502,6 +503,7 @@ static void triangle_cw( struct setup_context *setup, do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface ); } + static void triangle_ccw( struct setup_context *setup, const float (*v0)[4], const float (*v1)[4], @@ -510,6 +512,7 @@ static void triangle_ccw( struct setup_context *setup, do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface ); } + static void triangle_both( struct setup_context *setup, const float (*v0)[4], const float (*v1)[4], @@ -522,12 +525,13 @@ static void triangle_both( struct setup_context *setup, const float fy = v1[0][1] - v2[0][1]; /* det = cross(e,f).z */ - if (ex * fy - ey * fx < 0) + if (ex * fy - ey * fx < 0.0f) triangle_ccw( setup, v0, v1, v2 ); else triangle_cw( setup, v0, v1, v2 ); } + static void triangle_nop( struct setup_context *setup, const float (*v0)[4], const float (*v1)[4], @@ -554,5 +558,3 @@ lp_setup_choose_triangle( struct setup_context *setup ) break; } } - - -- cgit v1.2.3 From f94a99170ecdc3286408b3628fbae9f45518007e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 18:54:48 -0700 Subject: llvmpipe: optimize lp_rast_clear_color() for non-gray colors This makes a big difference in progs that clear to a non-gray color. Some demos are 30-50% faster. --- src/gallium/drivers/llvmpipe/lp_rast.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 6c7ece9fdb..3849116758 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -187,17 +187,33 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, if (clear_color[0] == clear_color[1] && clear_color[1] == clear_color[2] && clear_color[2] == clear_color[3]) { + /* clear to grayscale value {x, x, x, x} */ for (i = 0; i < rast->state.fb.nr_cbufs; i++) { memset(color_tile[i], clear_color[0], TILE_SIZE * TILE_SIZE * 4); } } else { - unsigned x, y, chan; - for (i = 0; i < rast->state.fb.nr_cbufs; i++) - for (y = 0; y < TILE_SIZE; y++) - for (x = 0; x < TILE_SIZE; x++) - for (chan = 0; chan < 4; ++chan) - TILE_PIXEL(color_tile[i], x, y, chan) = clear_color[chan]; + /* Non-gray color. + * Note: if the swizzled tile layout changes (see TILE_PIXEL) this code + * will need to change. It'll be pretty obvious when clearing no longer + * works. + */ + const unsigned chunk = TILE_SIZE / 4; + for (i = 0; i < rast->state.fb.nr_cbufs; i++) { + uint8_t *c = color_tile[i]; + unsigned j; + for (j = 0; j < 4 * TILE_SIZE; j++) { + memset(c, clear_color[0], chunk); + c += chunk; + memset(c, clear_color[1], chunk); + c += chunk; + memset(c, clear_color[2], chunk); + c += chunk; + memset(c, clear_color[3], chunk); + c += chunk; + } + assert(c - color_tile[i] == TILE_SIZE * TILE_SIZE * 4); + } } } -- cgit v1.2.3 From 2ba1c8189a124932b7b35115caf8f442bf4a7125 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 18:58:38 -0700 Subject: llvmpipe: use one loop in lp_rast_clear_zstencil() This is just a tiny bit faster. --- src/gallium/drivers/llvmpipe/lp_rast.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 3849116758..75562bf62d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -226,14 +226,13 @@ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, unsigned thread_index, const union lp_rast_cmd_arg arg) { - unsigned i, j; + unsigned i; uint32_t *depth_tile = rast->tasks[thread_index].tile.depth; LP_DBG(DEBUG_RAST, "%s 0x%x\n", __FUNCTION__, arg.clear_zstencil); - for (i = 0; i < TILE_SIZE; i++) - for (j = 0; j < TILE_SIZE; j++) - depth_tile[i*TILE_SIZE + j] = arg.clear_zstencil; + for (i = 0; i < TILE_SIZE * TILE_SIZE; i++) + depth_tile[i] = arg.clear_zstencil; } -- cgit v1.2.3 From 4414a1a73ca649df12b514daa82381a2dbde2ba4 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 14 Jan 2010 14:19:16 -0700 Subject: llvmpipe: assorted clean-ups in texture code --- src/gallium/drivers/llvmpipe/lp_context.c | 1 - src/gallium/drivers/llvmpipe/lp_texture.c | 53 +++++++++++++++---------------- src/gallium/drivers/llvmpipe/lp_texture.h | 8 ++--- 3 files changed, 29 insertions(+), 33 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 8d965175f8..bd549d4028 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -166,7 +166,6 @@ llvmpipe_create( struct pipe_screen *screen ) llvmpipe->pipe.is_buffer_referenced = llvmpipe_is_buffer_referenced; llvmpipe_init_query_funcs( llvmpipe ); - llvmpipe_init_texture_funcs( llvmpipe ); /* * Create drawing context and plug our rendering stage into it. diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index a953e8845a..14f636e4ae 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -46,21 +46,19 @@ #include "lp_winsys.h" -/* Simple, maximally packed layout. - */ - -/* Conventional allocation path for non-display textures: +/** + * Conventional allocation path for non-display textures: + * Simple, maximally packed layout. */ static boolean llvmpipe_texture_layout(struct llvmpipe_screen *screen, - struct llvmpipe_texture * lpt) + struct llvmpipe_texture *lpt) { struct pipe_texture *pt = &lpt->base; unsigned level; unsigned width = pt->width0; unsigned height = pt->height0; unsigned depth = pt->depth0; - unsigned buffer_size = 0; for (level = 0; level <= pt->last_level; level++) { @@ -79,7 +77,7 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * lpt->stride[level]); - width = u_minify(width, 1); + width = u_minify(width, 1); height = u_minify(height, 1); depth = u_minify(depth, 1); } @@ -89,9 +87,11 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, return lpt->data != NULL; } + + static boolean llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen, - struct llvmpipe_texture * lpt) + struct llvmpipe_texture *lpt) { struct llvmpipe_winsys *winsys = screen->winsys; @@ -106,9 +106,6 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen, } - - - static struct pipe_texture * llvmpipe_texture_create(struct pipe_screen *_screen, const struct pipe_texture *templat) @@ -125,7 +122,7 @@ llvmpipe_texture_create(struct pipe_screen *_screen, /* XXX: The xlib state tracker is brain-dead and will request * PIPE_FORMAT_Z16_UNORM no matter how much we tell it we don't support it. */ - if(lpt->base.format == PIPE_FORMAT_Z16_UNORM) + if (lpt->base.format == PIPE_FORMAT_Z16_UNORM) lpt->base.format = PIPE_FORMAT_Z32_UNORM; if (lpt->base.tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET | @@ -189,12 +186,15 @@ llvmpipe_texture_destroy(struct pipe_texture *pt) struct llvmpipe_screen *screen = llvmpipe_screen(pt->screen); struct llvmpipe_texture *lpt = llvmpipe_texture(pt); - if(lpt->dt) { + if (lpt->dt) { + /* display target */ struct llvmpipe_winsys *winsys = screen->winsys; winsys->displaytarget_destroy(winsys, lpt->dt); } - else + else { + /* regular texture */ align_free(lpt->data); + } FREE(lpt); } @@ -356,7 +356,8 @@ llvmpipe_transfer_map( struct pipe_screen *_screen, lpt = llvmpipe_texture(transfer->texture); format = lpt->base.format; - if(lpt->dt) { + if (lpt->dt) { + /* display target */ struct llvmpipe_winsys *winsys = screen->winsys; map = winsys->displaytarget_map(winsys, lpt->dt, @@ -364,14 +365,15 @@ llvmpipe_transfer_map( struct pipe_screen *_screen, if (map == NULL) return NULL; } - else + else { + /* regular texture */ map = lpt->data; + } /* May want to different things here depending on read/write nature * of the map: */ - if (transfer->texture && (transfer->usage & PIPE_TRANSFER_WRITE)) - { + if (transfer->texture && (transfer->usage & PIPE_TRANSFER_WRITE)) { /* Do something to notify sharing contexts of a texture change. */ screen->timestamp++; @@ -386,28 +388,23 @@ llvmpipe_transfer_map( struct pipe_screen *_screen, static void -llvmpipe_transfer_unmap(struct pipe_screen *_screen, +llvmpipe_transfer_unmap(struct pipe_screen *screen, struct pipe_transfer *transfer) { - struct llvmpipe_screen *screen = llvmpipe_screen(_screen); + struct llvmpipe_screen *lp_screen = llvmpipe_screen(screen); struct llvmpipe_texture *lpt; assert(transfer->texture); lpt = llvmpipe_texture(transfer->texture); - if(lpt->dt) { - struct llvmpipe_winsys *winsys = screen->winsys; + if (lpt->dt) { + /* display target */ + struct llvmpipe_winsys *winsys = lp_screen->winsys; winsys->displaytarget_unmap(winsys, lpt->dt); } } -void -llvmpipe_init_texture_funcs(struct llvmpipe_context *lp) -{ -} - - void llvmpipe_init_screen_texture_funcs(struct pipe_screen *screen) { diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h index e37ef6059a..87c905bc02 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.h +++ b/src/gallium/drivers/llvmpipe/lp_texture.h @@ -37,6 +37,7 @@ struct pipe_screen; struct llvmpipe_context; struct llvmpipe_displaytarget; + struct llvmpipe_texture { struct pipe_texture base; @@ -58,6 +59,7 @@ struct llvmpipe_texture unsigned timestamp; }; + struct llvmpipe_transfer { struct pipe_transfer base; @@ -73,6 +75,7 @@ llvmpipe_texture(struct pipe_texture *pt) return (struct llvmpipe_texture *) pt; } + static INLINE const struct llvmpipe_texture * llvmpipe_texture_const(const struct pipe_texture *pt) { @@ -87,11 +90,8 @@ llvmpipe_transfer(struct pipe_transfer *pt) } -extern void -llvmpipe_init_texture_funcs( struct llvmpipe_context *llvmpipe ); - extern void llvmpipe_init_screen_texture_funcs(struct pipe_screen *screen); -#endif /* LP_TEXTURE */ +#endif /* LP_TEXTURE_H */ -- cgit v1.2.3 From dfd6e762e70aef6694fa3baedf8d423b08995233 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 14 Jan 2010 16:21:49 -0700 Subject: llvmpipe: remove redundant code in llvmpipe_set_blend_color() --- src/gallium/drivers/llvmpipe/lp_state_blend.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_state_blend.c b/src/gallium/drivers/llvmpipe/lp_state_blend.c index a10c5918df..9b950e82d8 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_state_blend.c @@ -80,9 +80,6 @@ void llvmpipe_set_blend_color( struct pipe_context *pipe, if(memcmp(&llvmpipe->blend_color, blend_color, sizeof *blend_color) == 0) return; - if(memcmp(&llvmpipe->blend_color, blend_color, sizeof *blend_color) == 0) - return; - draw_flush(llvmpipe->draw); memcpy(&llvmpipe->blend_color, blend_color, sizeof *blend_color); -- cgit v1.2.3 From 4f5675e94b936d012b89937aac8a16c28143d5ec Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 14 Jan 2010 19:04:40 -0700 Subject: gallium/util: added debug_dump_texture() and ppm output Now we can get dump debug images on Linux too. --- src/gallium/auxiliary/util/u_debug.c | 75 ++++++++++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_debug.h | 4 ++ 2 files changed, 79 insertions(+) diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index 9b4e6ca2a7..7ee0864d29 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -631,6 +631,14 @@ const char *u_prim_name( unsigned prim ) #ifdef DEBUG +/** + * Dump an image to a .raw or .ppm file (depends on OS). + * \param format PIPE_FORMAT_x + * \param cpp bytes per pixel + * \param width width in pixels + * \param height height in pixels + * \param stride row stride in bytes + */ void debug_dump_image(const char *prefix, unsigned format, unsigned cpp, unsigned width, unsigned height, @@ -672,6 +680,52 @@ void debug_dump_image(const char *prefix, } EngUnmapFile(iFile); +#elif defined(PIPE_OS_UNIX) + /* write a ppm file */ + char filename[256]; + FILE *f; + + util_snprintf(filename, sizeof(filename), "%s.ppm", prefix); + + f = fopen(filename, "w"); + if (f) { + int i, x, y; + int r, g, b; + const uint8_t *ptr = (uint8_t *) data; + + /* XXX this is a hack */ + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + r = 2; + g = 1; + b = 0; + break; + default: + r = 0; + g = 1; + b = 1; + } + + fprintf(f, "P6\n"); + fprintf(f, "# ppm-file created by osdemo.c\n"); + fprintf(f, "%i %i\n", width, height); + fprintf(f, "255\n"); + fclose(f); + + f = fopen(filename, "ab"); /* reopen in binary append mode */ + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + i = y * stride + x * cpp; + fputc(ptr[i + r], f); /* write red */ + fputc(ptr[i + g], f); /* write green */ + fputc(ptr[i + b], f); /* write blue */ + } + } + fclose(f); + } + else { + fprintf(stderr, "Can't open %s for writing\n", filename); + } #endif } @@ -712,6 +766,27 @@ error: } +void debug_dump_texture(const char *prefix, + struct pipe_texture *texture) +{ + struct pipe_surface *surface; + struct pipe_screen *screen; + + if (!texture) + return; + + screen = texture->screen; + + /* XXX for now, just dump image for face=0, level=0 */ + surface = screen->get_tex_surface(screen, texture, 0, 0, 0, + PIPE_TEXTURE_USAGE_SAMPLER); + if (surface) { + debug_dump_surface(prefix, surface); + screen->tex_surface_destroy(surface); + } +} + + #pragma pack(push,2) struct bmp_file_header { uint16_t bfType; diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h index facc30a553..131c991539 100644 --- a/src/gallium/auxiliary/util/u_debug.h +++ b/src/gallium/auxiliary/util/u_debug.h @@ -354,6 +354,8 @@ debug_memory_end(unsigned long beginning); #ifdef DEBUG struct pipe_surface; struct pipe_transfer; +struct pipe_texture; + void debug_dump_image(const char *prefix, unsigned format, unsigned cpp, unsigned width, unsigned height, @@ -361,6 +363,8 @@ void debug_dump_image(const char *prefix, const void *data); void debug_dump_surface(const char *prefix, struct pipe_surface *surface); +void debug_dump_texture(const char *prefix, + struct pipe_texture *texture); void debug_dump_surface_bmp(const char *filename, struct pipe_surface *surface); void debug_dump_transfer_bmp(const char *filename, -- cgit v1.2.3 From 16ecd2f0780c7b9123aa50d5a174c127a1e408b2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 14 Jan 2010 19:05:55 -0700 Subject: llvmpipe: clean-up, fixing up frame dump/debug code --- src/gallium/drivers/llvmpipe/lp_flush.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index 16fb00092e..07f32848c7 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -74,16 +74,21 @@ llvmpipe_flush( struct pipe_context *pipe, /* Enable to dump BMPs of the color/depth buffers each frame */ #if 0 - if(flags & PIPE_FLUSH_FRAME) { + if (flags & PIPE_FLUSH_FRAME) { static unsigned frame_no = 1; - static char filename[256]; + char filename[256]; unsigned i; - for (i = 0; i < llvmpipe->framebuffer.nr_cbufs) { - util_snprintf(filename, sizeof(filename), "cbuf%u_%u.bmp", i, frame_no); - debug_dump_surface_bmp(filename, llvmpipe->framebuffer.cbufs[i]); + + for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) { + util_snprintf(filename, sizeof(filename), "cbuf%u_%u", i, frame_no); + debug_dump_surface(filename, llvmpipe->framebuffer.cbufs[i]); + } + + if (0) { + util_snprintf(filename, sizeof(filename), "zsbuf_%u", frame_no); + debug_dump_surface(filename, llvmpipe->framebuffer.zsbuf); } - util_snprintf(filename, sizeof(filename), "zsbuf_%u.bmp", frame_no); - debug_dump_surface_bmp(filename, llvmpipe->framebuffer.zsbuf); + ++frame_no; } #endif -- cgit v1.2.3 From ca12e30d97b83fb33e1f8f83da05b5ed2809b0af Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 14 Jan 2010 19:08:19 -0700 Subject: llvmpipe: minor comment update --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 39ad983a35..716b88073e 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -131,7 +131,7 @@ static void perspective_coef( struct lp_rast_triangle *tri, /** * Special coefficient setup for gl_FragCoord. - * X and Y are trivial, though Y has to be inverted for OpenGL. + * X and Y are trivial * Z and W are copied from position_coef which should have already been computed. * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. */ -- cgit v1.2.3 From 4461442849bfdb817334b38567136f7f9dabdf59 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 14 Jan 2010 19:15:00 -0700 Subject: llvmpipe: implement scissor testing The scissor test is implemented as another per-quad operation in the JIT code. The four scissor box params are passed via the lp_jit_context. In the JIT code we compare the quad's x/y coords against the clip bounds and create a new in/out mask that's AND'd with the main quad mask. Note: we should also do scissor testing in the triangle setup code to improve efficiency. That's not done yet. --- src/gallium/drivers/llvmpipe/lp_jit.c | 21 +++++++--- src/gallium/drivers/llvmpipe/lp_jit.h | 19 ++++++++- src/gallium/drivers/llvmpipe/lp_setup.c | 34 ++++++++++++++++ src/gallium/drivers/llvmpipe/lp_setup.h | 4 ++ src/gallium/drivers/llvmpipe/lp_setup_context.h | 6 +++ src/gallium/drivers/llvmpipe/lp_state.h | 1 + src/gallium/drivers/llvmpipe/lp_state_derived.c | 4 ++ src/gallium/drivers/llvmpipe/lp_state_fs.c | 52 +++++++++++++++++++++++++ 8 files changed, 134 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 4ef0783f3e..429cb973c2 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -79,13 +79,16 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) /* struct lp_jit_context */ { - LLVMTypeRef elem_types[4]; + LLVMTypeRef elem_types[8]; LLVMTypeRef context_type; elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* constants */ - elem_types[1] = LLVMFloatType(); /* alpha_ref_value */ - elem_types[2] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */ - elem_types[3] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */ + elem_types[1] = LLVMFloatType(); /* alpha_ref_value */ elem_types[2] = LLVMFloatType(); /* scissor_xmin */ + elem_types[3] = LLVMFloatType(); /* scissor_ymin */ + elem_types[4] = LLVMFloatType(); /* scissor_xmax */ + elem_types[5] = LLVMFloatType(); /* scissor_ymax */ + elem_types[6] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */ + elem_types[7] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */ context_type = LLVMStructType(elem_types, Elements(elem_types), 0); @@ -93,8 +96,16 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) screen->target, context_type, 0); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, alpha_ref_value, screen->target, context_type, 1); - LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color, + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_xmin, screen->target, context_type, 2); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_ymin, + screen->target, context_type, 3); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_xmax, + screen->target, context_type, 4); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_ymax, + screen->target, context_type, 5); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color, + screen->target, context_type, 6); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures, screen->target, context_type, LP_JIT_CONTEXT_TEXTURES_INDEX); diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 3b316914b0..9cbe1bd3b1 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -79,6 +79,9 @@ struct lp_jit_context float alpha_ref_value; + /** floats, not ints */ + float scissor_xmin, scissor_ymin, scissor_xmax, scissor_ymax; + /* FIXME: store (also?) in floats */ uint8_t *blend_color; @@ -92,10 +95,22 @@ struct lp_jit_context #define lp_jit_context_alpha_ref_value(_builder, _ptr) \ lp_build_struct_get(_builder, _ptr, 1, "alpha_ref_value") +#define lp_jit_context_scissor_xmin_value(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 2, "scissor_xmin") + +#define lp_jit_context_scissor_ymin_value(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 3, "scissor_ymin") + +#define lp_jit_context_scissor_xmax_value(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 4, "scissor_xmax") + +#define lp_jit_context_scissor_ymax_value(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 5, "scissor_ymax") + #define lp_jit_context_blend_color(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 2, "blend_color") + lp_build_struct_get(_builder, _ptr, 6, "blend_color") -#define LP_JIT_CONTEXT_TEXTURES_INDEX 3 +#define LP_JIT_CONTEXT_TEXTURES_INDEX 7 #define lp_jit_context_textures(_builder, _ptr) \ lp_build_struct_get_ptr(_builder, _ptr, LP_JIT_CONTEXT_TEXTURES_INDEX, "textures") diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 649e97992b..284337e825 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -413,6 +413,21 @@ lp_setup_set_blend_color( struct setup_context *setup, } +void +lp_setup_set_scissor( struct setup_context *setup, + const struct pipe_scissor_state *scissor ) +{ + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + + assert(scissor); + + if (memcmp(&setup->scissor.current, scissor, sizeof(*scissor)) != 0) { + setup->scissor.current = *scissor; /* struct copy */ + setup->dirty |= LP_SETUP_NEW_SCISSOR; + } +} + + void lp_setup_set_flatshade_first( struct setup_context *setup, boolean flatshade_first ) @@ -534,6 +549,25 @@ lp_setup_update_state( struct setup_context *setup ) setup->dirty |= LP_SETUP_NEW_FS; } + if (setup->dirty & LP_SETUP_NEW_SCISSOR) { + float *stored; + + stored = lp_scene_alloc_aligned(scene, 4 * sizeof(int32_t), 16); + + stored[0] = (float) setup->scissor.current.minx; + stored[1] = (float) setup->scissor.current.miny; + stored[2] = (float) setup->scissor.current.maxx; + stored[3] = (float) setup->scissor.current.maxy; + + setup->scissor.stored = stored; + + setup->fs.current.jit_context.scissor_xmin = stored[0]; + setup->fs.current.jit_context.scissor_ymin = stored[1]; + setup->fs.current.jit_context.scissor_xmax = stored[2]; + setup->fs.current.jit_context.scissor_ymax = stored[3]; + + setup->dirty |= LP_SETUP_NEW_FS; + } if(setup->dirty & LP_SETUP_NEW_CONSTANTS) { struct pipe_buffer *buffer = setup->constants.current; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 429abeba43..c7ef3d394a 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -113,6 +113,10 @@ void lp_setup_set_blend_color( struct setup_context *setup, const struct pipe_blend_color *blend_color ); +void +lp_setup_set_scissor( struct setup_context *setup, + const struct pipe_scissor_state *scissor ); + void lp_setup_set_sampler_textures( struct setup_context *setup, unsigned num, struct pipe_texture **texture); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index e6f6f0e0bb..fc0aef1376 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -45,6 +45,7 @@ #define LP_SETUP_NEW_FS 0x01 #define LP_SETUP_NEW_CONSTANTS 0x02 #define LP_SETUP_NEW_BLEND_COLOR 0x04 +#define LP_SETUP_NEW_SCISSOR 0x08 struct lp_scene_queue; @@ -122,6 +123,11 @@ struct setup_context uint8_t *stored; } blend_color; + struct { + struct pipe_scissor_state current; + const void *stored; + } scissor; + unsigned dirty; /**< bitmask of LP_SETUP_NEW_x bits */ void (*point)( struct setup_context *, diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 4c6747bb2b..ddb152c074 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -72,6 +72,7 @@ struct lp_fragment_shader_variant_key enum pipe_format zsbuf_format; unsigned nr_cbufs:8; unsigned flatshade:1; + unsigned scissor:1; struct { ubyte colormask; diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 2c349fdb1d..28af477914 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -160,6 +160,7 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) if (llvmpipe->dirty & (LP_NEW_FS | LP_NEW_BLEND | + LP_NEW_SCISSOR | LP_NEW_DEPTH_STENCIL_ALPHA | LP_NEW_RASTERIZER | LP_NEW_SAMPLER | @@ -170,6 +171,9 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) lp_setup_set_blend_color(llvmpipe->setup, &llvmpipe->blend_color); + if (llvmpipe->dirty & LP_NEW_SCISSOR) + lp_setup_set_scissor(llvmpipe->setup, &llvmpipe->scissor); + if (llvmpipe->dirty & LP_NEW_DEPTH_STENCIL_ALPHA) lp_setup_set_alpha_ref_value(llvmpipe->setup, llvmpipe->depth_stencil->alpha.ref_value); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 26a2d6cc23..d12d3f6091 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -304,6 +304,51 @@ generate_tri_edge_mask(LLVMBuilderRef builder, } +static LLVMValueRef +generate_scissor_test(LLVMBuilderRef builder, + LLVMValueRef context_ptr, + const struct lp_build_interp_soa_context *interp, + struct lp_type type) +{ + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMValueRef xpos = interp->pos[0], ypos = interp->pos[1]; + LLVMValueRef xmin, ymin, xmax, ymax; + LLVMValueRef m0, m1, m2, m3, m; + + /* xpos, ypos contain the window coords for the four pixels in the quad */ + assert(xpos); + assert(ypos); + + /* get the current scissor bounds, convert to vectors */ + xmin = lp_jit_context_scissor_xmin_value(builder, context_ptr); + xmin = lp_build_broadcast(builder, vec_type, xmin); + + ymin = lp_jit_context_scissor_ymin_value(builder, context_ptr); + ymin = lp_build_broadcast(builder, vec_type, ymin); + + xmax = lp_jit_context_scissor_xmax_value(builder, context_ptr); + xmax = lp_build_broadcast(builder, vec_type, xmax); + + ymax = lp_jit_context_scissor_ymax_value(builder, context_ptr); + ymax = lp_build_broadcast(builder, vec_type, ymax); + + /* compare the fragment's position coordinates against the scissor bounds */ + m0 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, xpos, xmin); + m1 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, ypos, ymin); + m2 = lp_build_compare(builder, type, PIPE_FUNC_LESS, xpos, xmax); + m3 = lp_build_compare(builder, type, PIPE_FUNC_LESS, ypos, ymax); + + /* AND all the masks together */ + m = LLVMBuildAnd(builder, m0, m1, ""); + m = LLVMBuildAnd(builder, m, m2, ""); + m = LLVMBuildAnd(builder, m, m3, ""); + + lp_build_name(m, "scissormask"); + + return m; +} + + /** * Generate the fragment shader, depth/stencil test, and alpha tests. * \param i which quad in the tile, in range [0,3] @@ -372,6 +417,11 @@ generate_fs(struct llvmpipe_context *lp, /* 'mask' will control execution based on quad's pixel alive/killed state */ lp_build_mask_begin(&mask, flow, type, *pmask); + if (key->scissor) { + LLVMValueRef smask = + generate_scissor_test(builder, context_ptr, interp, type); + lp_build_mask_update(&mask, smask); + } early_depth_test = key->depth.enabled && @@ -968,6 +1018,7 @@ make_variant_key(struct llvmpipe_context *lp, /* alpha.ref_value is passed in jit_context */ key->flatshade = lp->rasterizer->flatshade; + key->scissor = lp->rasterizer->scissor; if (lp->framebuffer.nr_cbufs) { memcpy(&key->blend, lp->blend, sizeof key->blend); @@ -1033,6 +1084,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) key.blend.colormask == 0xf && !key.alpha.enabled && !key.depth.enabled && + !key.scissor && !shader->info.uses_kill ? TRUE : FALSE; -- cgit v1.2.3 From 8b29d39e99185d94a1fddafbe8b2ab0856fcdcc0 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 14 Jan 2010 19:22:36 -0700 Subject: progs/trival: updated tri-scissor-tri.c test The scissor left/right/bottom/top bounds can be moved by pressing the l/r/b/t and L/R/B/T keys. --- progs/trivial/tri-scissor-tri.c | 58 +++++++++++++++++++++++++++++++++-------- 1 file changed, 47 insertions(+), 11 deletions(-) diff --git a/progs/trivial/tri-scissor-tri.c b/progs/trivial/tri-scissor-tri.c index d65502d91b..1bb15501bb 100644 --- a/progs/trivial/tri-scissor-tri.c +++ b/progs/trivial/tri-scissor-tri.c @@ -31,10 +31,14 @@ #define CI_OFFSET_1 16 #define CI_OFFSET_2 32 -GLint Width = 250, Height = 250; +GLint Width = 300, Height = 300; GLenum doubleBuffer; +/* scissor bounds */ +static GLint Left, Right, Bottom, Top; + + static void Init(void) { fprintf(stderr, "GL_RENDERER = %s\n", (char *) glGetString(GL_RENDERER)); @@ -47,26 +51,57 @@ static void Init(void) static void Reshape(int width, int height) { - glViewport(0, 0, (GLint)width, (GLint)height); glMatrixMode(GL_PROJECTION); glLoadIdentity(); glOrtho(-1.0, 1.0, -1.0, 1.0, -0.5, 1000.0); glMatrixMode(GL_MODELVIEW); + + Width = width; + Height = height; + + Left = Width / 4; + Right = Width * 3 / 4; + Bottom = Height / 4; + Top = Height * 3 / 4; } static void Key(unsigned char key, int x, int y) { + int step = 2; + switch (key) { + case 'l': + Left -= step; + break; + case 'L': + Left += step; + break; + case 'r': + Right -= step; + break; + case 'R': + Right += step; + break; + case 'b': + Bottom -= step; + break; + case 'B': + Bottom += step; + break; + case 't': + Top -= step; + break; + case 'T': + Top += step; + break; + case 27: + exit(1); + default: + ; + } - switch (key) { - case 27: - exit(1); - default: - break; - } - - glutPostRedisplay(); + glutPostRedisplay(); } static void Draw(void) @@ -82,7 +117,8 @@ static void Draw(void) glVertex3f(-0.9, 0.0, -30.0); glEnd(); - glScissor(Width / 4, Height / 4, Width / 2, Height / 2); + printf("Scissor %d, %d .. %d, %d\n", Left, Bottom, Right, Top); + glScissor(Left, Bottom, Right-Left, Top-Bottom); glEnable(GL_SCISSOR_TEST); glBegin(GL_TRIANGLES); -- cgit v1.2.3 From f97eeeec6b9f8a979f9452e2dfdab86ccb058b64 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 15 Jan 2010 09:16:26 -0700 Subject: llvmpipe: add extra flags to DEFINES, no CFLAGS Doesn't make any real difference but -D flags are put into DEFINES everywhere else. --- src/gallium/drivers/llvmpipe/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 264999a7ce..3d8d88179a 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -3,7 +3,7 @@ include $(TOP)/configs/current LIBNAME = llvmpipe -CFLAGS += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS +DEFINES += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS C_SOURCES = \ lp_scene.c \ -- cgit v1.2.3 From 345178a7fc793c7789750de9f5e6d89f8e03af7c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 15 Jan 2010 10:04:16 -0700 Subject: llvmpipe: put labels on some value refs --- src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c index 5ee8d556a6..854dd0b28c 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -172,7 +172,7 @@ lp_build_sample_wrap(struct lp_build_sample_context *bld, case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: /* FIXME */ - _debug_printf("warning: failed to translate texture wrap mode %s\n", + _debug_printf("llvmpipe: failed to translate texture wrap mode %s\n", debug_dump_tex_wrap(wrap_mode, TRUE)); coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); coord = lp_build_min(int_coord_bld, coord, length_minus_one); @@ -201,9 +201,13 @@ lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld, x = lp_build_ifloor(&bld->coord_bld, s); y = lp_build_ifloor(&bld->coord_bld, t); + lp_build_name(x, "tex.x.floor"); + lp_build_name(y, "tex.y.floor"); x = lp_build_sample_wrap(bld, x, width, bld->static_state->pot_width, bld->static_state->wrap_s); y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t); + lp_build_name(x, "tex.x.wrapped"); + lp_build_name(y, "tex.y.wrapped"); lp_build_sample_texel_soa(bld, x, y, stride, data_ptr, texel); } -- cgit v1.2.3 From 3b1920a34903dfb753bc2a0461fef204d39846c6 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 15 Jan 2010 10:25:59 -0700 Subject: llvmpipe: asst code changes in lp_state_fs.c --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 59 ++++++++++++++++-------------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index d12d3f6091..f15fca293b 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -596,6 +596,7 @@ generate_fragment(struct llvmpipe_context *lp, LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][LP_MAX_VECTOR_LENGTH]; LLVMValueRef blend_mask; LLVMValueRef blend_in_color[NUM_CHANNELS]; + LLVMValueRef function; unsigned num_fs; unsigned i; unsigned chan; @@ -652,30 +653,33 @@ generate_fragment(struct llvmpipe_context *lp, func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); - variant->function = LLVMAddFunction(screen->module, "shader", func_type); - LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); + function = LLVMAddFunction(screen->module, "shader", func_type); + LLVMSetFunctionCallConv(function, LLVMCCallConv); + + variant->function = function; + /* XXX: need to propagate noalias down into color param now we are * passing a pointer-to-pointer? */ for(i = 0; i < Elements(arg_types); ++i) if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) - LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute); - - context_ptr = LLVMGetParam(variant->function, 0); - x = LLVMGetParam(variant->function, 1); - y = LLVMGetParam(variant->function, 2); - a0_ptr = LLVMGetParam(variant->function, 3); - dadx_ptr = LLVMGetParam(variant->function, 4); - dady_ptr = LLVMGetParam(variant->function, 5); - color_ptr_ptr = LLVMGetParam(variant->function, 6); - depth_ptr = LLVMGetParam(variant->function, 7); - c0 = LLVMGetParam(variant->function, 8); - c1 = LLVMGetParam(variant->function, 9); - c2 = LLVMGetParam(variant->function, 10); - step0_ptr = LLVMGetParam(variant->function, 11); - step1_ptr = LLVMGetParam(variant->function, 12); - step2_ptr = LLVMGetParam(variant->function, 13); + LLVMAddAttribute(LLVMGetParam(function, i), LLVMNoAliasAttribute); + + context_ptr = LLVMGetParam(function, 0); + x = LLVMGetParam(function, 1); + y = LLVMGetParam(function, 2); + a0_ptr = LLVMGetParam(function, 3); + dadx_ptr = LLVMGetParam(function, 4); + dady_ptr = LLVMGetParam(function, 5); + color_ptr_ptr = LLVMGetParam(function, 6); + depth_ptr = LLVMGetParam(function, 7); + c0 = LLVMGetParam(function, 8); + c1 = LLVMGetParam(function, 9); + c2 = LLVMGetParam(function, 10); + step0_ptr = LLVMGetParam(function, 11); + step1_ptr = LLVMGetParam(function, 12); + step2_ptr = LLVMGetParam(function, 13); lp_build_name(context_ptr, "context"); lp_build_name(x, "x"); @@ -696,7 +700,7 @@ generate_fragment(struct llvmpipe_context *lp, * Function body */ - block = LLVMAppendBasicBlock(variant->function, "entry"); + block = LLVMAppendBasicBlock(function, "entry"); builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); @@ -788,33 +792,30 @@ generate_fragment(struct llvmpipe_context *lp, /* Verify the LLVM IR. If invalid, dump and abort */ #ifdef DEBUG - if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { + if(LLVMVerifyFunction(function, LLVMPrintMessageAction)) { if (1) - LLVMDumpValue(variant->function); + LLVMDumpValue(function); abort(); } #endif /* Apply optimizations to LLVM IR */ if (1) - LLVMRunFunctionPassManager(screen->pass, variant->function); + LLVMRunFunctionPassManager(screen->pass, function); if (LP_DEBUG & DEBUG_JIT) { /* Print the LLVM IR to stderr */ - LLVMDumpValue(variant->function); + LLVMDumpValue(function); debug_printf("\n"); } /* * Translate the LLVM IR into machine code. */ - variant->jit_function = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, variant->function); + variant->jit_function = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, function); if (LP_DEBUG & DEBUG_ASM) lp_disassemble(variant->jit_function); - - variant->next = shader->variants; - shader->variants = variant; } @@ -888,6 +889,10 @@ generate_variant(struct llvmpipe_context *lp, generate_fragment(lp, shader, variant); + /* insert new variant into linked list */ + variant->next = shader->variants; + shader->variants = variant; + return variant; } -- cgit v1.2.3 From 2797f2bf57562c95a601a67edca3089641215cc4 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 15 Jan 2010 11:21:16 -0700 Subject: llvmpipe: generate two shader varients, one omits triangle in/out testing When we know that a 4x4 pixel block is entirely inside of a triangle use the jit function which omits the in/out test code. Results in a few percent speedup in many tests. --- src/gallium/drivers/llvmpipe/lp_rast.c | 52 +++++++++++++++---------- src/gallium/drivers/llvmpipe/lp_rast.h | 6 ++- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 43 +++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_rast_tri.c | 11 ++---- src/gallium/drivers/llvmpipe/lp_setup.c | 12 +++--- src/gallium/drivers/llvmpipe/lp_setup.h | 7 ++-- src/gallium/drivers/llvmpipe/lp_state.h | 4 +- src/gallium/drivers/llvmpipe/lp_state_fs.c | 59 ++++++++++++++++++++++------- 8 files changed, 142 insertions(+), 52 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 75562bf62d..d03ba1752d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -344,9 +344,6 @@ void lp_rast_set_state( struct lp_rasterizer *rast, -/* Within a tile: - */ - /** * Run the shader on all blocks in a tile. This is used when a tile is * completely contained inside a triangle. @@ -356,8 +353,8 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, unsigned thread_index, const union lp_rast_cmd_arg arg ) { - /* Set c1,c2,c3 to large values so the in/out test always passes */ - const int32_t c1 = INT_MIN, c2 = INT_MIN, c3 = INT_MIN; + const struct lp_rast_state *state = rast->tasks[thread_index].current_state; + struct lp_rast_tile *tile = &rast->tasks[thread_index].tile; const struct lp_rast_shader_inputs *inputs = arg.shade_tile; const unsigned tile_x = rast->tasks[thread_index].x; const unsigned tile_y = rast->tasks[thread_index].y; @@ -365,16 +362,35 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); - /* Use the existing preference for 4x4 (four quads) shading: - */ - for (y = 0; y < TILE_SIZE; y += 4) - for (x = 0; x < TILE_SIZE; x += 4) - lp_rast_shade_quads( rast, - thread_index, - inputs, - tile_x + x, - tile_y + y, - c1, c2, c3); + /* render the whole 64x64 tile in 4x4 chunks */ + for (y = 0; y < TILE_SIZE; y += 4){ + for (x = 0; x < TILE_SIZE; x += 4) { + uint8_t *color[PIPE_MAX_COLOR_BUFS]; + uint32_t *depth; + unsigned block_offset, i; + + /* offset of the 16x16 pixel block within the tile */ + block_offset = ((y / 4) * (16 * 16) + (x / 4) * 16); + + /* color buffer */ + for (i = 0; i < rast->state.fb.nr_cbufs; i++) + color[i] = tile->color[i] + 4 * block_offset; + + /* depth buffer */ + depth = tile->depth + block_offset; + + /* run shader */ + state->jit_function[0]( &state->jit_context, + tile_x + x, tile_y + y, + inputs->a0, + inputs->dadx, + inputs->dady, + color, + depth, + INT_MIN, INT_MIN, INT_MIN, + NULL, NULL, NULL ); + } + } } @@ -411,7 +427,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, iy = y % TILE_SIZE; /* offset of the 16x16 pixel block within the tile */ - block_offset = ((iy/4)*(16*16) + (ix/4)*16); + block_offset = ((iy / 4) * (16 * 16) + (ix / 4) * 16); /* color buffer */ for (i = 0; i < rast->state.fb.nr_cbufs; i++) @@ -433,7 +449,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, #endif /* run shader */ - state->jit_function( &state->jit_context, + state->jit_function[1]( &state->jit_context, x, y, inputs->a0, inputs->dadx, @@ -445,8 +461,6 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, } -/* End of tile: - */ /** diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index d926adb6b2..2a97fe4c67 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -66,8 +66,10 @@ struct lp_rast_state { /* The shader itself. Probably we also need to pass a pointer to * the tile color/z/stencil data somehow: - */ - lp_jit_frag_func jit_function; + * jit_function[0] skips the triangle in/out test code + * jit_function[1] does triangle in/out testing + */ + lp_jit_frag_func jit_function[2]; boolean opaque; }; diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 5afdeab049..607968e345 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -30,6 +30,7 @@ #include "pipe/p_thread.h" #include "lp_rast.h" +#include "lp_tile_soa.h" #define MAX_THREADS 8 /* XXX probably temporary here */ @@ -126,4 +127,46 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, unsigned x, unsigned y, int32_t c1, int32_t c2, int32_t c3); + +/** + * Shade all pixels in a 4x4 block. The fragment code omits the + * triangle in/out tests. + * \param x, y location of 4x4 block in window coords + */ +static INLINE void +lp_rast_shade_quads_all( struct lp_rasterizer *rast, + unsigned thread_index, + const struct lp_rast_shader_inputs *inputs, + unsigned x, unsigned y ) +{ + const struct lp_rast_state *state = rast->tasks[thread_index].current_state; + struct lp_rast_tile *tile = &rast->tasks[thread_index].tile; + const unsigned ix = x % TILE_SIZE, iy = y % TILE_SIZE; + uint8_t *color[PIPE_MAX_COLOR_BUFS]; + void *depth; + unsigned block_offset, i; + + /* offset of the containing 16x16 pixel block within the tile */ + block_offset = (iy / 4) * (16 * 16) + (ix / 4) * 16; + + /* color buffer */ + for (i = 0; i < rast->state.fb.nr_cbufs; i++) + color[i] = tile->color[i] + 4 * block_offset; + + /* depth buffer */ + depth = tile->depth + block_offset; + + /* run shader */ + state->jit_function[0]( &state->jit_context, + x, y, + inputs->a0, + inputs->dadx, + inputs->dady, + color, + depth, + INT_MIN, INT_MIN, INT_MIN, + NULL, NULL, NULL ); +} + + #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index bc7397f50c..9c3f699ec7 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -89,13 +89,10 @@ block_full_4( struct lp_rasterizer_task *rast_task, const struct lp_rast_triangle *tri, int x, int y ) { - /* Set c1,c2,c3 to large values so the in/out test always passes */ - const int32_t c1 = INT_MIN, c2 = INT_MIN, c3 = INT_MIN; - lp_rast_shade_quads(rast_task->rast, - rast_task->thread_index, - &tri->inputs, - x, y, - c1, c2, c3); + lp_rast_shade_quads_all(rast_task->rast, + rast_task->thread_index, + &tri->inputs, + x, y); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 284337e825..355c051837 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -362,14 +362,16 @@ lp_setup_set_fs_inputs( struct setup_context *setup, } void -lp_setup_set_fs_function( struct setup_context *setup, - lp_jit_frag_func jit_function, - boolean opaque ) +lp_setup_set_fs_functions( struct setup_context *setup, + lp_jit_frag_func jit_function0, + lp_jit_frag_func jit_function1, + boolean opaque ) { - LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) jit_function); + LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) jit_function0); /* FIXME: reference count */ - setup->fs.current.jit_function = jit_function; + setup->fs.current.jit_function[0] = jit_function0; + setup->fs.current.jit_function[1] = jit_function1; setup->fs.current.opaque = opaque; setup->dirty |= LP_SETUP_NEW_FS; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index c7ef3d394a..407f752777 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -96,9 +96,10 @@ lp_setup_set_fs_inputs( struct setup_context *setup, unsigned nr ); void -lp_setup_set_fs_function( struct setup_context *setup, - lp_jit_frag_func jit_function, - boolean opaque ); +lp_setup_set_fs_functions( struct setup_context *setup, + lp_jit_frag_func jit_function0, + lp_jit_frag_func jit_function1, + boolean opaque ); void lp_setup_set_fs_constants(struct setup_context *setup, diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index ddb152c074..224b6e523c 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -88,9 +88,9 @@ struct lp_fragment_shader_variant struct lp_fragment_shader_variant_key key; - LLVMValueRef function; + LLVMValueRef function[2]; - lp_jit_frag_func jit_function; + lp_jit_frag_func jit_function[2]; struct lp_fragment_shader_variant *next; }; diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index f15fca293b..a8f4a4ed46 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -349,9 +349,26 @@ generate_scissor_test(LLVMBuilderRef builder, } +static LLVMValueRef +build_int32_vec_const(int value) +{ + struct lp_type i32_type; + + memset(&i32_type, 0, sizeof i32_type); + i32_type.floating = FALSE; /* values are integers */ + i32_type.sign = TRUE; /* values are signed */ + i32_type.norm = FALSE; /* values are not normalized */ + i32_type.width = 32; /* 32-bit int values */ + i32_type.length = 4; /* 4 elements per vector */ + return lp_build_int_const_scalar(i32_type, value); +} + + + /** * Generate the fragment shader, depth/stencil test, and alpha tests. * \param i which quad in the tile, in range [0,3] + * \param do_tri_test if 1, do triangle edge in/out testing */ static void generate_fs(struct llvmpipe_context *lp, @@ -366,6 +383,7 @@ generate_fs(struct llvmpipe_context *lp, LLVMValueRef *pmask, LLVMValueRef (*color)[4], LLVMValueRef depth_ptr, + unsigned do_tri_test, LLVMValueRef c0, LLVMValueRef c1, LLVMValueRef c2, @@ -411,8 +429,13 @@ generate_fs(struct llvmpipe_context *lp, lp_build_flow_scope_declare(flow, &z); /* do triangle edge testing */ - generate_tri_edge_mask(builder, i, pmask, - c0, c1, c2, step0_ptr, step1_ptr, step2_ptr); + if (do_tri_test) { + generate_tri_edge_mask(builder, i, pmask, + c0, c1, c2, step0_ptr, step1_ptr, step2_ptr); + } + else { + *pmask = build_int32_vec_const(~0); + } /* 'mask' will control execution based on quad's pixel alive/killed state */ lp_build_mask_begin(&mask, flow, type, *pmask); @@ -563,7 +586,8 @@ generate_blend(const struct pipe_blend_state *blend, static void generate_fragment(struct llvmpipe_context *lp, struct lp_fragment_shader *shader, - struct lp_fragment_shader_variant *variant) + struct lp_fragment_shader_variant *variant, + unsigned do_tri_test) { struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); const struct lp_fragment_shader_variant_key *key = &variant->key; @@ -656,7 +680,7 @@ generate_fragment(struct llvmpipe_context *lp, function = LLVMAddFunction(screen->module, "shader", func_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); - variant->function = function; + variant->function[do_tri_test] = function; /* XXX: need to propagate noalias down into color param now we are @@ -738,6 +762,7 @@ generate_fragment(struct llvmpipe_context *lp, &fs_mask[i], /* output */ out_color, depth_ptr_i, + do_tri_test, c0, c1, c2, step0_ptr, step1_ptr, step2_ptr); @@ -812,10 +837,10 @@ generate_fragment(struct llvmpipe_context *lp, /* * Translate the LLVM IR into machine code. */ - variant->jit_function = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, function); + variant->jit_function[do_tri_test] = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, function); if (LP_DEBUG & DEBUG_ASM) - lp_disassemble(variant->jit_function); + lp_disassemble(variant->jit_function[do_tri_test]); } @@ -887,7 +912,8 @@ generate_variant(struct llvmpipe_context *lp, variant->shader = shader; memcpy(&variant->key, key, sizeof *key); - generate_fragment(lp, shader, variant); + generate_fragment(lp, shader, variant, 0); + generate_fragment(lp, shader, variant, 1); /* insert new variant into linked list */ variant->next = shader->variants; @@ -947,11 +973,15 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) variant = shader->variants; while(variant) { struct lp_fragment_shader_variant *next = variant->next; + unsigned i; - if(variant->function) { - if(variant->jit_function) - LLVMFreeMachineCodeForFunction(screen->engine, variant->function); - LLVMDeleteFunction(variant->function); + for (i = 0; i < Elements(variant->function); i++) { + if (variant->function[i]) { + if (variant->jit_function[i]) + LLVMFreeMachineCodeForFunction(screen->engine, + variant->function[i]); + LLVMDeleteFunction(variant->function[i]); + } } FREE(variant); @@ -1093,7 +1123,8 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) !shader->info.uses_kill ? TRUE : FALSE; - lp_setup_set_fs_function(lp->setup, - shader->current->jit_function, - opaque); + lp_setup_set_fs_functions(lp->setup, + shader->current->jit_function[0], + shader->current->jit_function[1], + opaque); } -- cgit v1.2.3 From 4bef3575e605d890d9f228391b4724d27b025f49 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 15 Jan 2010 11:25:13 -0700 Subject: llvmpipe: change 'in' to boolean, add comments --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 716b88073e..e7de6431d3 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -441,7 +441,7 @@ do_triangle_ccw(struct setup_context *setup, int cx1 = c1; int cx2 = c2; int cx3 = c3; - int in = 0; + boolean in = FALSE; /* are we inside the triangle? */ for (x = minx; x <= maxx; x++) { @@ -451,13 +451,13 @@ do_triangle_ccw(struct setup_context *setup, { /* do nothing */ if (in) - break; + break; /* exiting triangle, all done with this row */ } else if (cx1 + ei1 > 0 && cx2 + ei2 > 0 && cx3 + ei3 > 0) { - in = 1; + in = TRUE; /* triangle covers the whole tile- shade whole tile */ if(setup->fs.current.opaque) { lp_scene_bin_reset( scene, x, y ); @@ -471,7 +471,7 @@ do_triangle_ccw(struct setup_context *setup, } else { - in = 1; + in = TRUE; /* shade partial tile */ lp_scene_bin_command( scene, x, y, lp_rast_triangle, -- cgit v1.2.3 From 12ba9e99db51a4a9e2e28a0574ef59f6548d8a84 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 15 Jan 2010 11:26:35 -0700 Subject: llvmpipe: added comment about lookup-tables vs. computation --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 9c3f699ec7..92769beee1 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -40,6 +40,9 @@ * Map an index in [0,15] to an x,y position, multiplied by 4. * This is used to get the position of each subtile in a 4x4 * grid of edge step values. + * Note: we can use some bit twiddling to compute these values instead + * of using a look-up table, but there's no measurable performance + * difference. */ static const int pos_table4[16][2] = { { 0, 0 }, -- cgit v1.2.3 From 47fee146879aa8ac7f216c8ac5f3a84270266287 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 15 Jan 2010 11:35:28 -0700 Subject: llvmpipe: skip 4x4 in/out test code It's a litte faster to just do the in/out testing in the shader jit code. --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 92769beee1..b3d1e7dee4 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -149,10 +149,6 @@ do_block_16( struct lp_rasterizer_task *rast_task, int c2, int c3 ) { - const int ei1 = tri->ei1 * 4; - const int ei2 = tri->ei2 * 4; - const int ei3 = tri->ei3 * 4; - const int eo1 = tri->eo1 * 4; const int eo2 = tri->eo2 * 4; const int eo3 = tri->eo3 * 4; @@ -175,16 +171,10 @@ do_block_16( struct lp_rasterizer_task *rast_task, else { int px = x + pos_table4[i][0]; int py = y + pos_table4[i][1]; - if (cx1 + ei1 > 0 && - cx2 + ei2 > 0 && - cx3 + ei3 > 0) { - /* the block is completely inside the triangle */ - block_full_4(rast_task, tri, px, py); - } - else { - /* the block is partially in/out of the triangle */ - do_block_4(rast_task, tri, px, py, cx1, cx2, cx3); - } + /* Don't bother testing if the 4x4 block is entirely in/out of + * the triangle. It's a little faster to do it in the jit code. + */ + do_block_4(rast_task, tri, px, py, cx1, cx2, cx3); } } } -- cgit v1.2.3 From adb48d535082f5a311751e1866997e381b2d3038 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 15 Jan 2010 11:52:13 -0700 Subject: llvmpipe: remove lp_rast_triangle::min/max fields These values aren't needed outside the do_triangle_ccw() function. --- src/gallium/drivers/llvmpipe/lp_rast.h | 6 ------ src/gallium/drivers/llvmpipe/lp_setup_tri.c | 20 ++++++++++---------- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 2a97fe4c67..21ebfa7ca9 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -97,12 +97,6 @@ struct lp_rast_shader_inputs { * Objects of this type are put into the setup_context::data buffer. */ struct lp_rast_triangle { - /* bounding box of tri (in pixels) */ - int minx; - int maxx; - int miny; - int maxy; - /* one-pixel sized trivial accept offsets for each plane */ int ei1; int ei2; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index e7de6431d3..ae354b3870 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -288,13 +288,13 @@ do_triangle_ccw(struct setup_context *setup, } /* Bounding rectangle (in pixels) */ - tri->minx = (MIN3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER; - tri->maxx = (MAX3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER; - tri->miny = (MIN3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER; - tri->maxy = (MAX3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER; + minx = (MIN3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER; + maxx = (MAX3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER; + miny = (MIN3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER; + maxy = (MAX3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER; - if (tri->miny == tri->maxy || - tri->minx == tri->maxx) { + if (miny == maxy || + minx == maxx) { lp_scene_putback_data( scene, sizeof *tri ); return; } @@ -384,10 +384,10 @@ do_triangle_ccw(struct setup_context *setup, /* Convert to tile coordinates: */ - minx = tri->minx / TILE_SIZE; - miny = tri->miny / TILE_SIZE; - maxx = tri->maxx / TILE_SIZE; - maxy = tri->maxy / TILE_SIZE; + minx = minx / TILE_SIZE; + miny = miny / TILE_SIZE; + maxx = maxx / TILE_SIZE; + maxy = maxy / TILE_SIZE; /* Clamp maxx, maxy to framebuffer size */ -- cgit v1.2.3 From fdfe06ad804ea13e6e436d66c1bcafe0bde2f545 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 15 Jan 2010 12:06:00 -0700 Subject: llvmpipe: implement scissor test in triangle setup --- src/gallium/drivers/llvmpipe/lp_setup.c | 4 +++- src/gallium/drivers/llvmpipe/lp_setup.h | 3 ++- src/gallium/drivers/llvmpipe/lp_setup_context.h | 1 + src/gallium/drivers/llvmpipe/lp_setup_tri.c | 7 +++++++ src/gallium/drivers/llvmpipe/lp_state_rasterizer.c | 3 ++- 5 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 355c051837..f52dce65d7 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -339,13 +339,15 @@ lp_setup_fence( struct setup_context *setup ) void lp_setup_set_triangle_state( struct setup_context *setup, unsigned cull_mode, - boolean ccw_is_frontface) + boolean ccw_is_frontface, + boolean scissor ) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); setup->ccw_is_frontface = ccw_is_frontface; setup->cullmode = cull_mode; setup->triangle = first_triangle; + setup->scissor_test = scissor; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 407f752777..5081da29d1 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -88,7 +88,8 @@ lp_setup_bind_framebuffer( struct setup_context *setup, void lp_setup_set_triangle_state( struct setup_context *setup, unsigned cullmode, - boolean front_is_ccw ); + boolean front_is_ccw, + boolean scissor ); void lp_setup_set_fs_inputs( struct setup_context *setup, diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index fc0aef1376..a5fc34e54a 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -87,6 +87,7 @@ struct setup_context boolean flatshade_first; boolean ccw_is_frontface; + boolean scissor_test; unsigned cullmode; struct pipe_framebuffer_state fb; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index ae354b3870..018d254c76 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -293,6 +293,13 @@ do_triangle_ccw(struct setup_context *setup, miny = (MIN3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER; maxy = (MAX3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER; + if (setup->scissor_test) { + minx = MAX2(minx, setup->scissor.current.minx); + maxx = MIN2(maxx, setup->scissor.current.maxx); + miny = MAX2(miny, setup->scissor.current.miny); + maxy = MIN2(maxy, setup->scissor.current.maxy); + } + if (miny == maxy || minx == maxx) { lp_scene_putback_data( scene, sizeof *tri ); diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c index 7d4c310aae..feb012816c 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c +++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c @@ -61,7 +61,8 @@ void llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, if (llvmpipe->rasterizer) { lp_setup_set_triangle_state( llvmpipe->setup, llvmpipe->rasterizer->cull_mode, - llvmpipe->rasterizer->front_winding == PIPE_WINDING_CCW ); + llvmpipe->rasterizer->front_winding == PIPE_WINDING_CCW, + llvmpipe->rasterizer->scissor); } llvmpipe->dirty |= LP_NEW_RASTERIZER; -- cgit v1.2.3 From 591401ff05f878ff1607a1a34db1319103025d8f Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 16 Jan 2010 21:12:10 +0000 Subject: llvmpipe: use new u_ringbuffer for scene queue --- src/gallium/drivers/llvmpipe/lp_rast.c | 2 +- src/gallium/drivers/llvmpipe/lp_scene_queue.c | 114 ++++++++------------------ src/gallium/drivers/llvmpipe/lp_scene_queue.h | 8 +- src/gallium/drivers/llvmpipe/lp_setup.c | 11 ++- 4 files changed, 46 insertions(+), 89 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index d03ba1752d..2e2ebee45d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -844,7 +844,7 @@ thread_func( void *init_data ) const struct pipe_framebuffer_state *fb; boolean write_depth; - rast->curr_scene = lp_scene_dequeue( rast->full_scenes ); + rast->curr_scene = lp_scene_dequeue( rast->full_scenes, TRUE ); lp_scene_bin_iter_begin( rast->curr_scene ); diff --git a/src/gallium/drivers/llvmpipe/lp_scene_queue.c b/src/gallium/drivers/llvmpipe/lp_scene_queue.c index 8d65a6a6fa..43d74e4d89 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene_queue.c +++ b/src/gallium/drivers/llvmpipe/lp_scene_queue.c @@ -32,8 +32,7 @@ * which are produced by the "rast" code when it finishes rendering a scene. */ - -#include "pipe/p_thread.h" +#include "util/u_ringbuffer.h" #include "util/u_memory.h" #include "lp_scene_queue.h" @@ -41,20 +40,17 @@ #define MAX_SCENE_QUEUE 4 +struct scene_packet { + struct util_packet header; + struct lp_scene *scene; +}; /** * A queue of scenes */ struct lp_scene_queue { - /** XXX might use a linked list here somedone, but the list will - * probably always be pretty short. - */ - struct lp_scene *scenes[MAX_SCENE_QUEUE]; - unsigned count; - - pipe_condvar count_change; - pipe_mutex mutex; + struct util_ringbuffer *ring; }; @@ -64,11 +60,19 @@ struct lp_scene_queue * lp_scene_queue_create(void) { struct lp_scene_queue *queue = CALLOC_STRUCT(lp_scene_queue); - if (queue) { - pipe_condvar_init(queue->count_change); - pipe_mutex_init(queue->mutex); - } + if (queue == NULL) + return NULL; + + queue->ring = util_ringbuffer_create( MAX_SCENE_QUEUE * + sizeof( struct scene_packet ) / 4); + if (queue->ring == NULL) + goto fail; + return queue; + +fail: + FREE(queue); + return NULL; } @@ -76,41 +80,26 @@ lp_scene_queue_create(void) void lp_scene_queue_destroy(struct lp_scene_queue *queue) { - pipe_condvar_destroy(queue->count_change); - pipe_mutex_destroy(queue->mutex); + util_ringbuffer_destroy(queue->ring); + FREE(queue); } /** Remove first lp_scene from head of queue */ struct lp_scene * -lp_scene_dequeue(struct lp_scene_queue *queue) +lp_scene_dequeue(struct lp_scene_queue *queue, boolean wait) { - struct lp_scene *scene; - unsigned i; - - pipe_mutex_lock(queue->mutex); - while (queue->count == 0) { - pipe_condvar_wait(queue->count_change, queue->mutex); - } - - assert(queue->count >= 1); - - /* get head */ - scene = queue->scenes[0]; - - /* shift entries */ - for (i = 0; i < queue->count - 1; i++) { - queue->scenes[i] = queue->scenes[i + 1]; - } + struct scene_packet packet; + enum pipe_error ret; - queue->count--; + ret = util_ringbuffer_dequeue(queue->ring, + &packet.header, + sizeof packet / 4, + wait ); + if (ret != PIPE_OK) + return NULL; - /* signal size change */ - pipe_condvar_signal(queue->count_change); - - pipe_mutex_unlock(queue->mutex); - - return scene; + return packet.scene; } @@ -118,47 +107,16 @@ lp_scene_dequeue(struct lp_scene_queue *queue) void lp_scene_enqueue(struct lp_scene_queue *queue, struct lp_scene *scene) { - pipe_mutex_lock(queue->mutex); - - assert(queue->count < MAX_SCENE_QUEUE); + struct scene_packet packet; - /* debug: check that scene is not already in the queue */ - if (0) { - unsigned i; - for (i = 0; i < queue->count; i++) { - assert(queue->scenes[i] != scene); - } - } + packet.header.dwords = sizeof packet / 4; + packet.header.data24 = 0; + packet.scene = scene; - /* add to end */ - queue->scenes[queue->count++] = scene; - - /* signal size change */ - pipe_condvar_signal(queue->count_change); - - pipe_mutex_unlock(queue->mutex); + util_ringbuffer_enqueue(queue->ring, &packet.header); } -/** Return number of entries in the queue */ -unsigned -lp_scene_queue_count(struct lp_scene_queue *queue) -{ - unsigned count; - pipe_mutex_lock(queue->mutex); - count = queue->count; - pipe_mutex_unlock(queue->mutex); - return count; -} -/** Wait until the queue has exactly 'count' entries */ -void -lp_scene_queue_wait_count(struct lp_scene_queue *queue, unsigned count) -{ - pipe_mutex_lock(queue->mutex); - while (queue->count != count) { - pipe_condvar_wait(queue->count_change, queue->mutex); - } - pipe_mutex_unlock(queue->mutex); -} + diff --git a/src/gallium/drivers/llvmpipe/lp_scene_queue.h b/src/gallium/drivers/llvmpipe/lp_scene_queue.h index 1bd475fa50..fd7c65a2c8 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene_queue.h +++ b/src/gallium/drivers/llvmpipe/lp_scene_queue.h @@ -40,16 +40,12 @@ void lp_scene_queue_destroy(struct lp_scene_queue *queue); struct lp_scene * -lp_scene_dequeue(struct lp_scene_queue *queue); +lp_scene_dequeue(struct lp_scene_queue *queue, boolean wait); void -lp_scene_enqueue(struct lp_scene_queue *queue, struct lp_scene *bins); +lp_scene_enqueue(struct lp_scene_queue *queue, struct lp_scene *scene); -unsigned -lp_scene_queue_count(struct lp_scene_queue *queue); -void -lp_scene_queue_wait_count(struct lp_scene_queue *queue, unsigned size); #endif /* LP_BIN_QUEUE */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index f52dce65d7..d4a4724ad1 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -57,8 +57,11 @@ struct lp_scene * lp_setup_get_current_scene(struct setup_context *setup) { if (!setup->scene) { - /* wait for a free/empty bin */ - setup->scene = lp_scene_dequeue(setup->empty_scenes); + + /* wait for a free/empty scene + */ + setup->scene = lp_scene_dequeue(setup->empty_scenes, TRUE); + if(0)lp_scene_reset( setup->scene ); /* XXX temporary? */ lp_scene_set_framebuffer_size(setup->scene, @@ -651,8 +654,8 @@ lp_setup_destroy( struct setup_context *setup ) pipe_buffer_reference(&setup->constants.current, NULL); /* free the scenes in the 'empty' queue */ - while (lp_scene_queue_count(setup->empty_scenes) > 0) { - struct lp_scene *scene = lp_scene_dequeue(setup->empty_scenes); + while (1) { + struct lp_scene *scene = lp_scene_dequeue(setup->empty_scenes, FALSE); if (!scene) break; lp_scene_destroy(scene); -- cgit v1.2.3 From 62623c4dc5d8b646942bc65e8de350e812945ad1 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 18 Jan 2010 13:10:10 -0700 Subject: llvmpipe: added show_tiles and show_subtiles debug options These options draw lines over the tiles to show the 64x64 tile bounds and 16x16 sub-tile bounds. For debugging/visualization. --- src/gallium/drivers/llvmpipe/lp_debug.h | 3 ++ src/gallium/drivers/llvmpipe/lp_rast.c | 60 ++++++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_screen.c | 2 ++ 3 files changed, 65 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_debug.h b/src/gallium/drivers/llvmpipe/lp_debug.h index 74b2757494..7128e8eb4b 100644 --- a/src/gallium/drivers/llvmpipe/lp_debug.h +++ b/src/gallium/drivers/llvmpipe/lp_debug.h @@ -45,6 +45,9 @@ st_print_current(void); #define DEBUG_QUERY 0x40 #define DEBUG_SCREEN 0x80 #define DEBUG_JIT 0x100 +#define DEBUG_SHOW_TILES 0x200 +#define DEBUG_SHOW_SUBTILES 0x400 + #ifdef DEBUG extern int LP_DEBUG; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 2e2ebee45d..440bb32235 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -461,6 +461,61 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, } +#ifdef DEBUG +/** + * Set top row and left column of the tile's pixels to white. For debugging. + */ +static void +outline_tile(uint8_t *tile) +{ + const uint8_t val = 0xff; + unsigned i; + + for (i = 0; i < TILE_SIZE; i++) { + TILE_PIXEL(tile, i, 0, 0) = val; + TILE_PIXEL(tile, i, 0, 1) = val; + TILE_PIXEL(tile, i, 0, 2) = val; + TILE_PIXEL(tile, i, 0, 3) = val; + + TILE_PIXEL(tile, 0, i, 0) = val; + TILE_PIXEL(tile, 0, i, 1) = val; + TILE_PIXEL(tile, 0, i, 2) = val; + TILE_PIXEL(tile, 0, i, 3) = val; + } +} +#endif /* DEBUG */ + + +#ifdef DEBUG +/** + * Draw grid of gray lines at 16-pixel intervals across the tile to + * show the sub-tile boundaries. For debugging. + */ +static void +outline_subtiles(uint8_t *tile) +{ + const uint8_t val = 0x80; + const unsigned step = 16; + unsigned i, j; + + for (i = 0; i < TILE_SIZE; i += 16) { + for (j = 0; j < TILE_SIZE; j++) { + TILE_PIXEL(tile, i, j, 0) = val; + TILE_PIXEL(tile, i, j, 1) = val; + TILE_PIXEL(tile, i, j, 2) = val; + TILE_PIXEL(tile, i, j, 3) = val; + + TILE_PIXEL(tile, j, i, 0) = val; + TILE_PIXEL(tile, j, i, 1) = val; + TILE_PIXEL(tile, j, i, 2) = val; + TILE_PIXEL(tile, j, i, 3) = val; + } + } + + outline_tile(tile); +} +#endif /* DEBUG */ + /** @@ -500,6 +555,11 @@ static void lp_rast_store_color( struct lp_rasterizer *rast, LP_DBG(DEBUG_RAST, "%s [%u] %d,%d %dx%d\n", __FUNCTION__, thread_index, x, y, w, h); + if (LP_DEBUG & DEBUG_SHOW_SUBTILES) + outline_subtiles(rast->tasks[thread_index].tile.color[i]); + else if (LP_DEBUG & DEBUG_SHOW_TILES) + outline_tile(rast->tasks[thread_index].tile.color[i]); + lp_tile_write_4ub(transfer->texture->format, rast->tasks[thread_index].tile.color[i], rast->cbuf_map[i], diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index a28f6935b6..72f2e8ebf8 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -52,6 +52,8 @@ static const struct debug_named_value lp_debug_flags[] = { { "query", DEBUG_QUERY }, { "screen", DEBUG_SCREEN }, { "jit", DEBUG_JIT }, + { "show_tiles", DEBUG_SHOW_TILES }, + { "show_subtiles", DEBUG_SHOW_SUBTILES }, {NULL, 0} }; #endif -- cgit v1.2.3 From 89bb07730b1c0f292d1d70a99466e8a885fb87bf Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 18 Jan 2010 14:35:43 -0700 Subject: util: fix broken util_ringbuffer_dequeue() The tests for an empty ring buffer were incorrect. Fixes glxinfo segfaults. Plus, add a new assertion. --- src/gallium/auxiliary/util/u_ringbuffer.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/gallium/auxiliary/util/u_ringbuffer.c b/src/gallium/auxiliary/util/u_ringbuffer.c index 3f43a19e01..e73ba0b348 100644 --- a/src/gallium/auxiliary/util/u_ringbuffer.c +++ b/src/gallium/auxiliary/util/u_ringbuffer.c @@ -53,11 +53,22 @@ void util_ringbuffer_destroy( struct util_ringbuffer *ring ) FREE(ring); } +/** + * Return number of free entries in the ring + */ static INLINE unsigned util_ringbuffer_space( const struct util_ringbuffer *ring ) { return (ring->tail - (ring->head + 1)) & ring->mask; } +/** + * Is the ring buffer empty? + */ +static INLINE boolean util_ringbuffer_empty( const struct util_ringbuffer *ring ) +{ + return util_ringbuffer_space(ring) == ring->mask; +} + void util_ringbuffer_enqueue( struct util_ringbuffer *ring, const struct util_packet *packet ) { @@ -67,6 +78,10 @@ void util_ringbuffer_enqueue( struct util_ringbuffer *ring, */ pipe_mutex_lock(ring->mutex); + /* make sure we don't request an impossible amount of space + */ + assert(packet->dwords <= ring->mask); + /* Wait for free space: */ while (util_ringbuffer_space(ring) < packet->dwords) @@ -104,14 +119,14 @@ enum pipe_error util_ringbuffer_dequeue( struct util_ringbuffer *ring, */ pipe_mutex_lock(ring->mutex); - /* Wait for free space: + /* Get next ring entry: */ if (wait) { - while (util_ringbuffer_space(ring) == 0) + while (util_ringbuffer_empty(ring)) pipe_condvar_wait(ring->change, ring->mutex); } else { - if (util_ringbuffer_space(ring) == 0) { + if (util_ringbuffer_empty(ring)) { ret = PIPE_ERROR_OUT_OF_MEMORY; goto out; } -- cgit v1.2.3 From 9a23d810be02edf740ce58196435cd6cdfd903c9 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 18 Jan 2010 17:39:54 -0700 Subject: llvmpipe: tweak a comment --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 4b51d6b964..ab545ed3de 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -666,7 +666,7 @@ generate_fragment(struct llvmpipe_context *lp, arg_types[6] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */ arg_types[7] = LLVMPointerType(fs_int_vec_type, 0); /* depth */ arg_types[8] = LLVMInt32Type(); /* c0 */ - arg_types[9] = LLVMInt32Type(); /* c1 */ + arg_types[9] = LLVMInt32Type(); /* c1 */ arg_types[10] = LLVMInt32Type(); /* c2 */ /* Note: the step arrays are built as int32[16] but we interpret * them here as int32_vec4[4]. -- cgit v1.2.3 From 75f262b8b441e05f5b8811db1c205220200d64ad Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jan 2010 09:30:13 -0700 Subject: llvmpipe: updated comments --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 018d254c76..0d89bef606 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -440,8 +440,10 @@ do_triangle_ccw(struct setup_context *setup, int x, y; - /* Trivially accept or reject blocks, else jump to per-pixel - * examination above. + /* Test tile-sized blocks against the triangle. + * Discard blocks fully outside the tri. If the block is fully + * contained inside the tri, bin an lp_rast_shade_tile command. + * Else, bin a lp_rast_triangle command. */ for (y = miny; y <= maxy; y++) { -- cgit v1.2.3 From 0fccfc9cc0cb7699598f1739d8cd3811175cdf13 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jan 2010 09:30:44 -0700 Subject: llvmpipe: remove unneeded DEBUG checks, use step var --- src/gallium/drivers/llvmpipe/lp_rast.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 440bb32235..5fe939d234 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -461,7 +461,6 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, } -#ifdef DEBUG /** * Set top row and left column of the tile's pixels to white. For debugging. */ @@ -483,10 +482,8 @@ outline_tile(uint8_t *tile) TILE_PIXEL(tile, 0, i, 3) = val; } } -#endif /* DEBUG */ -#ifdef DEBUG /** * Draw grid of gray lines at 16-pixel intervals across the tile to * show the sub-tile boundaries. For debugging. @@ -498,7 +495,7 @@ outline_subtiles(uint8_t *tile) const unsigned step = 16; unsigned i, j; - for (i = 0; i < TILE_SIZE; i += 16) { + for (i = 0; i < TILE_SIZE; i += step) { for (j = 0; j < TILE_SIZE; j++) { TILE_PIXEL(tile, i, j, 0) = val; TILE_PIXEL(tile, i, j, 1) = val; @@ -514,7 +511,6 @@ outline_subtiles(uint8_t *tile) outline_tile(tile); } -#endif /* DEBUG */ -- cgit v1.2.3 From 1073e39ab92a795f7b3958dd789ab324c82c00ae Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jan 2010 09:45:51 -0700 Subject: llvmpipe: re-order file list, fix indentation --- src/gallium/drivers/llvmpipe/Makefile | 8 ++++---- src/gallium/drivers/llvmpipe/SConscript | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 3d8d88179a..71e7c2b5d9 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -6,8 +6,6 @@ LIBNAME = llvmpipe DEFINES += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS C_SOURCES = \ - lp_scene.c \ - lp_scene_queue.c \ lp_bld_alpha.c \ lp_bld_arit.c \ lp_bld_blend_aos.c \ @@ -38,15 +36,17 @@ C_SOURCES = \ lp_fence.c \ lp_flush.c \ lp_jit.c \ + lp_query.c \ lp_rast.c \ lp_rast_tri.c \ + lp_scene.c \ + lp_scene_queue.c \ + lp_screen.c \ lp_setup.c \ lp_setup_line.c \ lp_setup_point.c \ lp_setup_tri.c \ lp_setup_vbuf.c \ - lp_query.c \ - lp_screen.c \ lp_state_blend.c \ lp_state_clip.c \ lp_state_derived.c \ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 5af77c4a12..c4e7a4a22f 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -32,14 +32,14 @@ llvmpipe = env.ConvenienceLibrary( 'lp_bld_depth.c', 'lp_bld_flow.c', 'lp_bld_format_aos.c', - 'lp_bld_format_query.c', + 'lp_bld_format_query.c', 'lp_bld_format_soa.c', 'lp_bld_interp.c', 'lp_bld_intr.c', 'lp_bld_logic.c', 'lp_bld_misc.cpp', - 'lp_bld_pack.c', - 'lp_bld_sample.c', + 'lp_bld_pack.c', + 'lp_bld_sample.c', 'lp_bld_sample_soa.c', 'lp_bld_struct.c', 'lp_bld_swizzle.c', @@ -53,6 +53,8 @@ llvmpipe = env.ConvenienceLibrary( 'lp_flush.c', 'lp_jit.c', 'lp_query.c', + 'lp_rast.c', + 'lp_rast_tri.c', 'lp_scene.c', 'lp_scene_queue.c', 'lp_screen.c', @@ -71,8 +73,6 @@ llvmpipe = env.ConvenienceLibrary( 'lp_state_vertex.c', 'lp_state_vs.c', 'lp_surface.c', - 'lp_rast.c', - 'lp_rast_tri.c', 'lp_tex_sample_llvm.c', 'lp_texture.c', 'lp_tile_soa.c', -- cgit v1.2.3 From d8d80a8d74416bffd274d3b0597706374a0c1cc8 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jan 2010 11:58:43 -0700 Subject: llvmpipe: fix-up comment --- src/gallium/drivers/llvmpipe/lp_texture.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 14f636e4ae..1c92d7f722 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -65,7 +65,8 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, unsigned nblocksx, nblocksy; /* Allocate storage for whole quads. This is particularly important - * for depth surfaces, which are currently stored in a swizzled format. */ + * for depth surfaces, which are currently stored in a swizzled format. + */ nblocksx = util_format_get_nblocksx(pt->format, align(width, 2)); nblocksy = util_format_get_nblocksy(pt->format, align(height, 2)); -- cgit v1.2.3 From ec459f2aeca39e51f495cde455ba18d0a9489caa Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jan 2010 16:58:25 -0700 Subject: llvmpipe: asst. task-related clean-ups --- src/gallium/drivers/llvmpipe/lp_rast.c | 51 +++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 5fe939d234..05901d07aa 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -275,7 +275,7 @@ void lp_rast_load_color( struct lp_rasterizer *rast, assert(h <= TILE_SIZE); lp_tile_read_4ub(transfer->texture->format, - rast->tasks[thread_index].tile.color[i], + task->tile.color[i], rast->cbuf_map[i], transfer->stride, x, y, @@ -309,8 +309,9 @@ void lp_rast_load_zstencil( struct lp_rasterizer *rast, unsigned thread_index, const union lp_rast_cmd_arg arg ) { - const unsigned x = rast->tasks[thread_index].x; - const unsigned y = rast->tasks[thread_index].y; + struct lp_rasterizer_task *task = &rast->tasks[thread_index]; + const unsigned x = task->x; + const unsigned y = task->y; unsigned w = TILE_SIZE; unsigned h = TILE_SIZE; @@ -323,7 +324,7 @@ void lp_rast_load_zstencil( struct lp_rasterizer *rast, LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); assert(rast->zsbuf_transfer->texture->format == PIPE_FORMAT_Z32_UNORM); - lp_tile_read_z32(rast->tasks[thread_index].tile.depth, + lp_tile_read_z32(task->tile.depth, rast->zsbuf_map, rast->zsbuf_transfer->stride, x, y, w, h); @@ -353,11 +354,12 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, unsigned thread_index, const union lp_rast_cmd_arg arg ) { - const struct lp_rast_state *state = rast->tasks[thread_index].current_state; - struct lp_rast_tile *tile = &rast->tasks[thread_index].tile; + struct lp_rasterizer_task *task = &rast->tasks[thread_index]; + const struct lp_rast_state *state = task->current_state; + struct lp_rast_tile *tile = &task->tile; const struct lp_rast_shader_inputs *inputs = arg.shade_tile; - const unsigned tile_x = rast->tasks[thread_index].x; - const unsigned tile_y = rast->tasks[thread_index].y; + const unsigned tile_x = task->x; + const unsigned tile_y = task->y; unsigned x, y; LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); @@ -404,8 +406,9 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, unsigned x, unsigned y, int32_t c1, int32_t c2, int32_t c3) { - const struct lp_rast_state *state = rast->tasks[thread_index].current_state; - struct lp_rast_tile *tile = &rast->tasks[thread_index].tile; + struct lp_rasterizer_task *task = &rast->tasks[thread_index]; + const struct lp_rast_state *state = task->current_state; + struct lp_rast_tile *tile = &task->tile; uint8_t *color[PIPE_MAX_COLOR_BUFS]; void *depth; unsigned i; @@ -520,8 +523,9 @@ outline_subtiles(uint8_t *tile) static void lp_rast_store_color( struct lp_rasterizer *rast, unsigned thread_index) { - const unsigned x = rast->tasks[thread_index].x; - const unsigned y = rast->tasks[thread_index].y; + struct lp_rasterizer_task *task = &rast->tasks[thread_index]; + const unsigned x = task->x; + const unsigned y = task->y; unsigned i; for (i = 0; i < rast->state.fb.nr_cbufs; i++) { @@ -552,12 +556,12 @@ static void lp_rast_store_color( struct lp_rasterizer *rast, thread_index, x, y, w, h); if (LP_DEBUG & DEBUG_SHOW_SUBTILES) - outline_subtiles(rast->tasks[thread_index].tile.color[i]); + outline_subtiles(task->tile.color[i]); else if (LP_DEBUG & DEBUG_SHOW_TILES) - outline_tile(rast->tasks[thread_index].tile.color[i]); + outline_tile(task->tile.color[i]); lp_tile_write_4ub(transfer->texture->format, - rast->tasks[thread_index].tile.color[i], + task->tile.color[i], rast->cbuf_map[i], transfer->stride, x, y, @@ -587,8 +591,9 @@ lp_tile_write_z32(const uint32_t *src, uint8_t *dst, unsigned dst_stride, static void lp_rast_store_zstencil( struct lp_rasterizer *rast, unsigned thread_index ) { - const unsigned x = rast->tasks[thread_index].x; - const unsigned y = rast->tasks[thread_index].y; + struct lp_rasterizer_task *task = &rast->tasks[thread_index]; + const unsigned x = task->x; + const unsigned y = task->y; unsigned w = TILE_SIZE; unsigned h = TILE_SIZE; @@ -601,7 +606,7 @@ static void lp_rast_store_zstencil( struct lp_rasterizer *rast, LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); assert(rast->zsbuf_transfer->texture->format == PIPE_FORMAT_Z32_UNORM); - lp_tile_write_z32(rast->tasks[thread_index].tile.depth, + lp_tile_write_z32(task->tile.depth, rast->zsbuf_map, rast->zsbuf_transfer->stride, x, y, w, h); @@ -991,12 +996,14 @@ lp_rast_create( struct pipe_screen *screen, struct lp_scene_queue *empty ) rast->full_scenes = lp_scene_queue_create(); for (i = 0; i < Elements(rast->tasks); i++) { + struct lp_rasterizer_task *task = &rast->tasks[i]; + for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ ) - rast->tasks[i].tile.color[cbuf] = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); + task->tile.color[cbuf] = align_malloc(TILE_SIZE * TILE_SIZE * 4, 16); - rast->tasks[i].tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); - rast->tasks[i].rast = rast; - rast->tasks[i].thread_index = i; + task->tile.depth = align_malloc(TILE_SIZE * TILE_SIZE * 4, 16); + task->rast = rast; + task->thread_index = i; } create_rast_threads(rast); -- cgit v1.2.3 From 4d2dc9da82fcb0464b88c273a606f16d0183a758 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 20 Jan 2010 11:32:48 -0700 Subject: llvmpipe: updated comment --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index ab545ed3de..0053c1b88b 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -31,6 +31,8 @@ * Code generate the whole fragment pipeline. * * The fragment pipeline consists of the following stages: + * - triangle edge in/out testing + * - scissor test * - stipple (TBI) * - early depth test * - fragment shader -- cgit v1.2.3 From 9b534400d9969eceac46b28145405086dda8c113 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 20 Jan 2010 17:19:00 -0700 Subject: llvmpipe: add makefile rule for generating .s files --- src/gallium/drivers/llvmpipe/Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 71e7c2b5d9..666aa7293e 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -68,3 +68,8 @@ include ../../Makefile.template lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxiliary/util/u_format_access.py ../../auxiliary/util/u_format.csv python lp_tile_soa.py ../../auxiliary/util/u_format.csv > $@ + + +# to make a .s file to inspect assembly code +.c.s: + $(CC) -S $(INCLUDES) $(DEFINES) $(CFLAGS) $(LIBRARY_DEFINES) $< -- cgit v1.2.3 From 58903b378188861a6f7a67bbfb07424b73df2a1b Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 20 Jan 2010 17:23:11 -0700 Subject: llvmpipe: put TILE_SIZE #define in its own header To avoid including lp_tile_soa.h in other places. --- src/gallium/drivers/llvmpipe/lp_tile_size.h | 39 +++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_tile_soa.h | 15 +++-------- 2 files changed, 43 insertions(+), 11 deletions(-) create mode 100644 src/gallium/drivers/llvmpipe/lp_tile_size.h diff --git a/src/gallium/drivers/llvmpipe/lp_tile_size.h b/src/gallium/drivers/llvmpipe/lp_tile_size.h new file mode 100644 index 0000000000..f0b983c063 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_tile_size.h @@ -0,0 +1,39 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef LP_TILE_SIZE_H +#define LP_TILE_SIZE_H + + +/** + * Tile size (width and height). This needs to be a power of two. + */ +#define TILE_ORDER 6 +#define TILE_SIZE (1 << TILE_ORDER) + + +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h index 1b7be3cce0..eea3ab8499 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h @@ -30,7 +30,7 @@ #include "pipe/p_compiler.h" #include "tgsi/tgsi_exec.h" /* for NUM_CHANNELS */ - +#include "lp_tile_size.h" #ifdef __cplusplus extern "C" { @@ -40,22 +40,15 @@ extern "C" { struct pipe_transfer; -/** - * Cache tile size (width and height). This needs to be a power of two. - */ -#define TILE_ORDER 6 -#define TILE_SIZE (1 << TILE_ORDER) - - #define TILE_VECTOR_HEIGHT 4 #define TILE_VECTOR_WIDTH 4 extern const unsigned char tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH]; -#define TILE_C_STRIDE (TILE_VECTOR_HEIGHT * TILE_VECTOR_WIDTH) -#define TILE_X_STRIDE (NUM_CHANNELS * TILE_C_STRIDE) -#define TILE_Y_STRIDE (TILE_VECTOR_HEIGHT * TILE_SIZE * NUM_CHANNELS) +#define TILE_C_STRIDE (TILE_VECTOR_HEIGHT * TILE_VECTOR_WIDTH) //16 +#define TILE_X_STRIDE (NUM_CHANNELS * TILE_C_STRIDE) //64 +#define TILE_Y_STRIDE (TILE_VECTOR_HEIGHT * TILE_SIZE * NUM_CHANNELS) //1024 #define TILE_PIXEL(_p, _x, _y, _c) \ ((_p)[((_y) / TILE_VECTOR_HEIGHT) * TILE_Y_STRIDE + \ -- cgit v1.2.3 From 0706dae088e5b46c4cad1a5ee41038e05c7f363b Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 20 Jan 2010 17:44:12 -0700 Subject: llvmpipe: align display target size to multiple of tile size This will allow us to skip clipping tiles to surface bounds. --- src/gallium/drivers/llvmpipe/lp_texture.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 1c92d7f722..36e2ebb41a 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -40,9 +40,10 @@ #include "util/u_memory.h" #include "lp_context.h" +#include "lp_screen.h" #include "lp_state.h" #include "lp_texture.h" -#include "lp_screen.h" +#include "lp_tile_size.h" #include "lp_winsys.h" @@ -67,8 +68,8 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, /* Allocate storage for whole quads. This is particularly important * for depth surfaces, which are currently stored in a swizzled format. */ - nblocksx = util_format_get_nblocksx(pt->format, align(width, 2)); - nblocksy = util_format_get_nblocksy(pt->format, align(height, 2)); + nblocksx = util_format_get_nblocksx(pt->format, align(width, TILE_SIZE)); + nblocksy = util_format_get_nblocksy(pt->format, align(height, TILE_SIZE)); lpt->stride[level] = align(nblocksx * util_format_get_blocksize(pt->format), 16); @@ -96,10 +97,15 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen, { struct llvmpipe_winsys *winsys = screen->winsys; + /* Round up the surface size to a multiple of the tile size to + * avoid tile clipping. + */ + unsigned width = align(lpt->base.width0, TILE_SIZE); + unsigned height = align(lpt->base.height0, TILE_SIZE); + lpt->dt = winsys->displaytarget_create(winsys, lpt->base.format, - lpt->base.width0, - lpt->base.height0, + width, height, 16, &lpt->stride[0] ); @@ -299,8 +305,8 @@ llvmpipe_get_tex_transfer(struct pipe_screen *screen, pipe_texture_reference(&pt->texture, texture); pt->x = x; pt->y = y; - pt->width = w; - pt->height = h; + pt->width = align(w, TILE_SIZE); + pt->height = align(h, TILE_SIZE); pt->stride = lptex->stride[level]; pt->usage = usage; pt->face = face; -- cgit v1.2.3 From 7319ae0954980196822a09d914e8b7d9cad07d16 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 20 Jan 2010 17:47:22 -0700 Subject: llvmpipe: remove tile clipping code The surface is always a multiple of the tile size now. --- src/gallium/drivers/llvmpipe/lp_rast.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 05901d07aa..e27b6528ea 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -261,13 +261,6 @@ void lp_rast_load_color( struct lp_rasterizer *rast, if (y >= transfer->height) continue; - /* XXX: require tile-size aligned render target dimensions: - */ - if (x + w > transfer->width) - w -= x + w - transfer->width; - - if (y + h > transfer->height) - h -= y + h - transfer->height; assert(w >= 0); assert(h >= 0); @@ -539,19 +532,6 @@ static void lp_rast_store_color( struct lp_rasterizer *rast, if (y >= transfer->height) continue; - /* XXX: require tile-size aligned render target dimensions: - */ - if (x + w > transfer->width) - w -= x + w - transfer->width; - - if (y + h > transfer->height) - h -= y + h - transfer->height; - - assert(w >= 0); - assert(h >= 0); - assert(w <= TILE_SIZE); - assert(h <= TILE_SIZE); - LP_DBG(DEBUG_RAST, "%s [%u] %d,%d %dx%d\n", __FUNCTION__, thread_index, x, y, w, h); -- cgit v1.2.3 From 63f249bf909cab60635c2df9122db86eaab6c421 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 20 Jan 2010 17:48:09 -0700 Subject: llvmpipe: optimize tile writing code The code which converts/copies color tiles to the linear layout has been rewritten. There's less arithmetic and better loop unrolling, and possibly a better memory access pattern. Some demos, like gears, are about 20% faster now. --- src/gallium/drivers/llvmpipe/lp_tile_soa.py | 127 ++++++++++++++++++++++++---- 1 file changed, 109 insertions(+), 18 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py index a603b7f9f4..5d53689a3d 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -129,22 +129,8 @@ def generate_format_read(format, dst_type, dst_native_type, dst_suffix): print -def generate_format_write(format, src_type, src_native_type, src_suffix): - '''Generate the function to write pixels to a particular format''' - - name = short_name(format) - - dst_native_type = native_type(format) - - print 'static void' - print 'lp_tile_%s_write_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, src_suffix, src_native_type) - print '{' - print ' unsigned x, y;' - print ' uint8_t *dst_row = dst + y0*dst_stride;' - print ' for (y = 0; y < h; ++y) {' - print ' %s *dst_pixel = (%s *)(dst_row + x0*%u);' % (dst_native_type, dst_native_type, format.stride()) - print ' for (x = 0; x < w; ++x) {' - +def compute_inverse_swizzle(format): + '''Return an array[4] of inverse swizzle terms''' inv_swizzle = [None]*4 if format.colorspace == 'rgb': for i in range(4): @@ -155,8 +141,86 @@ def generate_format_write(format, src_type, src_native_type, src_suffix): swizzle = format.out_swizzle[0] if swizzle < 4: inv_swizzle[swizzle] = 0 - else: - assert False + return inv_swizzle + + +def pack_rgba(format, src_type, r, g, b, a): + """Return an expression for packing r, g, b, a into a pixel of the + given format. Ex: '(b << 24) | (g << 16) | (r << 8) | (a << 0)' + """ + assert format.colorspace == 'rgb' + inv_swizzle = compute_inverse_swizzle(format) + shift = 0 + expr = None + for i in range(4): + # choose r, g, b, or a depending on the inverse swizzle term + if inv_swizzle[i] == 0: + value = r + elif inv_swizzle[i] == 1: + value = g + elif inv_swizzle[i] == 2: + value = b + elif inv_swizzle[i] == 3: + value = a + else: + value = None + + if value: + dst_type = format.in_types[i] + dst_native_type = native_type(format) + value = conversion_expr(src_type, dst_type, dst_native_type, value) + term = "((%s) << %d)" % (value, shift) + if expr: + expr = expr + " | " + term + else: + expr = term + + width = format.in_types[i].size + shift = shift + width + return expr + + +def emit_unrolled_write_code(format, src_type): + '''Emit code for writing a block based on unrolled loops. + This is considerably faster than the TILE_PIXEL-based code below. + ''' + dst_native_type = native_type(format) + print ' const unsigned dstpix_stride = dst_stride / %d;' % format.stride() + print ' %s *dstpix = (%s *) dst;' % (dst_native_type, dst_native_type) + print ' unsigned int qx, qy, i;' + print + print ' for (qy = 0; qy < h; qy += TILE_VECTOR_HEIGHT) {' + print ' const unsigned py = y0 + qy;' + print ' for (qx = 0; qx < w; qx += TILE_VECTOR_WIDTH) {' + print ' const unsigned px = x0 + qx;' + print ' const uint8_t *r = src + 0 * TILE_C_STRIDE;' + print ' const uint8_t *g = src + 1 * TILE_C_STRIDE;' + print ' const uint8_t *b = src + 2 * TILE_C_STRIDE;' + print ' const uint8_t *a = src + 3 * TILE_C_STRIDE;' + print ' (void) r; (void) g; (void) b; (void) a; /* silence warnings */' + print ' for (i = 0; i < TILE_C_STRIDE; i += 2) {' + print ' const uint32_t pixel0 = %s;' % pack_rgba(format, src_type, "r[i+0]", "g[i+0]", "b[i+0]", "a[i+0]") + print ' const uint32_t pixel1 = %s;' % pack_rgba(format, src_type, "r[i+1]", "g[i+1]", "b[i+1]", "a[i+1]") + print ' const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);' + print ' dstpix[offset + 0] = pixel0;' + print ' dstpix[offset + 1] = pixel1;' + print ' }' + print ' src += TILE_X_STRIDE;' + print ' }' + print ' }' + + +def emit_tile_pixel_write_code(format, src_type): + '''Emit code for writing a block based on the TILE_PIXEL macro.''' + dst_native_type = native_type(format) + + inv_swizzle = compute_inverse_swizzle(format) + + print ' unsigned x, y;' + print ' uint8_t *dst_row = dst + y0*dst_stride;' + print ' for (y = 0; y < h; ++y) {' + print ' %s *dst_pixel = (%s *)(dst_row + x0*%u);' % (dst_native_type, dst_native_type, format.stride()) + print ' for (x = 0; x < w; ++x) {' if format.layout == ARITH: print ' %s pixel = 0;' % dst_native_type @@ -185,6 +249,20 @@ def generate_format_write(format, src_type, src_native_type, src_suffix): print ' }' print ' dst_row += dst_stride;' print ' }' + + +def generate_format_write(format, src_type, src_native_type, src_suffix): + '''Generate the function to write pixels to a particular format''' + + name = short_name(format) + + print 'static void' + print 'lp_tile_%s_write_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, src_suffix, src_native_type) + print '{' + if format.layout == ARITH and format.colorspace == 'rgb': + emit_unrolled_write_code(format, src_type) + else: + emit_tile_pixel_write_code(format, src_type) print '}' print @@ -265,6 +343,19 @@ def main(): print ' { 10, 11, 14, 15}' print '};' print + print '/* Note: these lookup tables could be replaced with some' + print ' * bit-twiddling code, but this is a little faster.' + print ' */' + print 'static unsigned tile_x_offset[TILE_VECTOR_WIDTH * TILE_VECTOR_HEIGHT] = {' + print ' 0, 1, 0, 1, 2, 3, 2, 3,' + print ' 0, 1, 0, 1, 2, 3, 2, 3' + print '};' + print + print 'static unsigned tile_y_offset[TILE_VECTOR_WIDTH * TILE_VECTOR_HEIGHT] = {' + print ' 0, 0, 1, 1, 0, 0, 1, 1,' + print ' 2, 2, 3, 3, 2, 2, 3, 3' + print '};' + print generate_clamp() -- cgit v1.2.3 From cd9d9e2436a0815f6ed3a61d2cdf8fad53278506 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 21 Jan 2010 14:59:01 -0700 Subject: llvmpipe: added simple perf/statistics counting facility Currently counting number of tris, how many tiles of each size are fully covered, partially covered or empty, etc. Set LP_DEBUG=counters to enable. Results are printed upon context destruction. --- src/gallium/drivers/llvmpipe/Makefile | 1 + src/gallium/drivers/llvmpipe/SConscript | 1 + src/gallium/drivers/llvmpipe/lp_context.c | 5 ++ src/gallium/drivers/llvmpipe/lp_debug.h | 1 + src/gallium/drivers/llvmpipe/lp_perf.c | 86 +++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_perf.h | 74 +++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_rast_tri.c | 6 ++ src/gallium/drivers/llvmpipe/lp_screen.c | 1 + src/gallium/drivers/llvmpipe/lp_setup.c | 2 +- src/gallium/drivers/llvmpipe/lp_setup.h | 2 +- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 16 ++++-- 11 files changed, 189 insertions(+), 6 deletions(-) create mode 100644 src/gallium/drivers/llvmpipe/lp_perf.c create mode 100644 src/gallium/drivers/llvmpipe/lp_perf.h diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 666aa7293e..899af6acf8 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -36,6 +36,7 @@ C_SOURCES = \ lp_fence.c \ lp_flush.c \ lp_jit.c \ + lp_perf.c \ lp_query.c \ lp_rast.c \ lp_rast_tri.c \ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index c4e7a4a22f..d7a396292c 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -52,6 +52,7 @@ llvmpipe = env.ConvenienceLibrary( 'lp_fence.c', 'lp_flush.c', 'lp_jit.c', + 'lp_perf.c', 'lp_query.c', 'lp_rast.c', 'lp_rast_tri.c', diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index c5b00f8e23..51de6f93ca 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -38,6 +38,7 @@ #include "lp_clear.h" #include "lp_context.h" #include "lp_flush.h" +#include "lp_perf.h" #include "lp_state.h" #include "lp_surface.h" #include "lp_texture.h" @@ -54,6 +55,8 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); uint i; + lp_print_counters(); + /* This will also destroy llvmpipe->setup: */ if (llvmpipe->draw) @@ -195,6 +198,8 @@ llvmpipe_create( struct pipe_screen *screen ) lp_init_surface_functions(llvmpipe); + lp_reset_counters(); + return &llvmpipe->pipe; fail: diff --git a/src/gallium/drivers/llvmpipe/lp_debug.h b/src/gallium/drivers/llvmpipe/lp_debug.h index 7128e8eb4b..7e04bd471e 100644 --- a/src/gallium/drivers/llvmpipe/lp_debug.h +++ b/src/gallium/drivers/llvmpipe/lp_debug.h @@ -47,6 +47,7 @@ st_print_current(void); #define DEBUG_JIT 0x100 #define DEBUG_SHOW_TILES 0x200 #define DEBUG_SHOW_SUBTILES 0x400 +#define DEBUG_COUNTERS 0x800 #ifdef DEBUG diff --git a/src/gallium/drivers/llvmpipe/lp_perf.c b/src/gallium/drivers/llvmpipe/lp_perf.c new file mode 100644 index 0000000000..2628d51069 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_perf.c @@ -0,0 +1,86 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_debug.h" +#include "lp_debug.h" +#include "lp_perf.h" + + + +struct lp_counters lp_count; + + +void +lp_reset_counters(void) +{ + memset(&lp_count, 0, sizeof(lp_count)); +} + + +void +lp_print_counters(void) +{ + if (LP_DEBUG & DEBUG_COUNTERS) { + unsigned total_64, total_16, total_4; + float p1, p2, p3; + + debug_printf("llvmpipe: nr_triangles: %9u\n", lp_count.nr_tris); + debug_printf("llvmpipe: nr_culled_triangles: %9u\n", lp_count.nr_culled_tris); + + total_64 = (lp_count.nr_empty_64 + + lp_count.nr_fully_covered_64 + + lp_count.nr_partially_covered_64); + + p1 = 100.0 * (float) lp_count.nr_empty_64 / (float) total_64; + p2 = 100.0 * (float) lp_count.nr_fully_covered_64 / (float) total_64; + p3 = 100.0 * (float) lp_count.nr_partially_covered_64 / (float) total_64; + + debug_printf("llvmpipe: nr_empty_64x64: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_64, p1, total_64); + debug_printf("llvmpipe: nr_fully_covered_64x64: %9u (%2.0f%% of %u)\n", lp_count.nr_fully_covered_64, p2, total_64); + debug_printf("llvmpipe: nr_partially_covered_64x64: %9u (%2.0f%% of %u)\n", lp_count.nr_partially_covered_64, p3, total_64); + + total_16 = (lp_count.nr_empty_16 + + lp_count.nr_fully_covered_16 + + lp_count.nr_partially_covered_16); + + p1 = 100.0 * (float) lp_count.nr_empty_16 / (float) total_16; + p2 = 100.0 * (float) lp_count.nr_fully_covered_16 / (float) total_16; + p3 = 100.0 * (float) lp_count.nr_partially_covered_16 / (float) total_16; + + debug_printf("llvmpipe: nr_empty_16x16: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_16, p1, total_16); + debug_printf("llvmpipe: nr_fully_covered_16x16: %9u (%2.0f%% of %u)\n", lp_count.nr_fully_covered_16, p2, total_16); + debug_printf("llvmpipe: nr_partially_covered_16x16: %9u (%2.0f%% of %u)\n", lp_count.nr_partially_covered_16, p3, total_16); + + total_4 = (lp_count.nr_empty_4 + lp_count.nr_non_empty_4); + + p1 = 100.0 * (float) lp_count.nr_empty_4 / (float) total_4; + p2 = 100.0 * (float) lp_count.nr_non_empty_4 / (float) total_4; + + debug_printf("llvmpipe: nr_empty_4x4: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_4, p1, total_4); + debug_printf("llvmpipe: nr_non_empty_4x4: %9u (%2.0f%% of %u)\n", lp_count.nr_non_empty_4, p2, total_4); + } +} diff --git a/src/gallium/drivers/llvmpipe/lp_perf.h b/src/gallium/drivers/llvmpipe/lp_perf.h new file mode 100644 index 0000000000..9886088c38 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_perf.h @@ -0,0 +1,74 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Performance / statistic counters, etc. + */ + + +#ifndef LP_PERF_H +#define LP_PERF_H + + +/** + * Various counters + */ +struct lp_counters +{ + unsigned nr_tris; + unsigned nr_culled_tris; + unsigned nr_empty_64; + unsigned nr_fully_covered_64; + unsigned nr_partially_covered_64; + unsigned nr_empty_16; + unsigned nr_fully_covered_16; + unsigned nr_partially_covered_16; + unsigned nr_empty_4; + unsigned nr_non_empty_4; +}; + + +extern struct lp_counters lp_count; + + +/** Increment the named counter (only for debug builds) */ +#ifdef DEBUG +#define LP_COUNT(counter) lp_count.counter++ +#else +#define LP_COUNT(counter) +#endif + + +extern void +lp_reset_counters(void); + + +extern void +lp_print_counters(void); + + +#endif /* LP_PERF_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index b3d1e7dee4..e9d15727a7 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -32,6 +32,7 @@ #include #include "util/u_math.h" #include "lp_debug.h" +#include "lp_perf.h" #include "lp_rast_priv.h" #include "lp_tile_soa.h" @@ -167,6 +168,7 @@ do_block_16( struct lp_rasterizer_task *rast_task, cx2 + eo2 < 0 || cx3 + eo3 < 0) { /* the block is completely outside the triangle - nop */ + LP_COUNT(nr_empty_4); } else { int px = x + pos_table4[i][0]; @@ -174,6 +176,7 @@ do_block_16( struct lp_rasterizer_task *rast_task, /* Don't bother testing if the 4x4 block is entirely in/out of * the triangle. It's a little faster to do it in the jit code. */ + LP_COUNT(nr_non_empty_4); do_block_4(rast_task, tri, px, py, cx1, cx2, cx3); } } @@ -223,6 +226,7 @@ lp_rast_triangle( struct lp_rasterizer *rast, cx2 + eo2 < 0 || cx3 + eo3 < 0) { /* the block is completely outside the triangle - nop */ + LP_COUNT(nr_empty_16); } else { int px = x + pos_table16[i][0]; @@ -232,10 +236,12 @@ lp_rast_triangle( struct lp_rasterizer *rast, cx2 + ei2 > 0 && cx3 + ei3 > 0) { /* the block is completely inside the triangle */ + LP_COUNT(nr_fully_covered_16); block_full_16(rast_task, tri, px, py); } else { /* the block is partially in/out of the triangle */ + LP_COUNT(nr_partially_covered_16); do_block_16(rast_task, tri, px, py, cx1, cx2, cx3); } } diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 72f2e8ebf8..9dd4ea7ef6 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -54,6 +54,7 @@ static const struct debug_named_value lp_debug_flags[] = { { "jit", DEBUG_JIT }, { "show_tiles", DEBUG_SHOW_TILES }, { "show_subtiles", DEBUG_SHOW_SUBTILES }, + { "counters", DEBUG_COUNTERS }, {NULL, 0} }; #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index d4a4724ad1..f8fc912fa1 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -499,7 +499,7 @@ lp_setup_set_sampler_textures( struct setup_context *setup, * Note: we have to check all scenes including any scenes currently * being rendered and the current scene being built. */ -boolean +unsigned lp_setup_is_texture_referenced( const struct setup_context *setup, const struct pipe_texture *texture ) { diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 5081da29d1..0e155a7dc3 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -123,7 +123,7 @@ void lp_setup_set_sampler_textures( struct setup_context *setup, unsigned num, struct pipe_texture **texture); -boolean +unsigned lp_setup_is_texture_referenced( const struct setup_context *setup, const struct pipe_texture *texture ); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 0d89bef606..76ecab7644 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -29,10 +29,11 @@ * Binning code for triangles */ -#include "lp_setup_context.h" -#include "lp_rast.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "lp_perf.h" +#include "lp_setup_context.h" +#include "lp_rast.h" #define NUM_CHANNELS 4 @@ -278,12 +279,15 @@ do_triangle_ccw(struct setup_context *setup, area = (tri->dx12 * tri->dy31 - tri->dx31 * tri->dy12); + LP_COUNT(nr_tris); + /* Cull non-ccw and zero-sized triangles. * * XXX: subject to overflow?? */ if (area <= 0.0f) { lp_scene_putback_data( scene, sizeof *tri ); + LP_COUNT(nr_culled_tris); return; } @@ -303,6 +307,7 @@ do_triangle_ccw(struct setup_context *setup, if (miny == maxy || minx == maxx) { lp_scene_putback_data( scene, sizeof *tri ); + LP_COUNT(nr_culled_tris); return; } @@ -459,6 +464,7 @@ do_triangle_ccw(struct setup_context *setup, cx3 + eo3 < 0) { /* do nothing */ + LP_COUNT(nr_empty_64); if (in) break; /* exiting triangle, all done with this row */ } @@ -466,8 +472,9 @@ do_triangle_ccw(struct setup_context *setup, cx2 + ei2 > 0 && cx3 + ei3 > 0) { - in = TRUE; /* triangle covers the whole tile- shade whole tile */ + LP_COUNT(nr_fully_covered_64); + in = TRUE; if(setup->fs.current.opaque) { lp_scene_bin_reset( scene, x, y ); lp_scene_bin_command( scene, x, y, @@ -480,8 +487,9 @@ do_triangle_ccw(struct setup_context *setup, } else { + /* rasterizer/shade partial tile */ + LP_COUNT(nr_partially_covered_64); in = TRUE; - /* shade partial tile */ lp_scene_bin_command( scene, x, y, lp_rast_triangle, lp_rast_arg_triangle(tri) ); -- cgit v1.2.3 From e5829ccc2b0cb1eed27c89763e8e4c6775dd6d4c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 21 Jan 2010 15:41:03 -0700 Subject: progs/demos: call glutDestroyWindow() upon exit --- progs/demos/engine.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/progs/demos/engine.c b/progs/demos/engine.c index 7e485111da..a4148357d4 100644 --- a/progs/demos/engine.c +++ b/progs/demos/engine.c @@ -26,6 +26,8 @@ /* Target engine speed: */ const int RPM = 100.0; +static int Win = 0; + /** * Engine description. @@ -1154,6 +1156,7 @@ OptRotate(void) static void OptExit(void) { + glutDestroyWindow(Win); exit(0); } @@ -1323,7 +1326,7 @@ main(int argc, char *argv[]) glutInitWindowSize(WinWidth, WinHeight); glutInit(&argc, argv); glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE | GLUT_DEPTH); - glutCreateWindow("OpenGL Engine Demo"); + Win = glutCreateWindow("OpenGL Engine Demo"); glewInit(); glutReshapeFunc(Reshape); glutMouseFunc(Mouse); -- cgit v1.2.3 From a904a7b99043c19493db5c0945b046795a5932b1 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 21 Jan 2010 16:21:31 -0700 Subject: llvmpipe: manually unroll the inputs.step[] setup code Good for a few more fps in some tests. --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 49 ++++++++++++++++++----------- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 76ecab7644..dcd849bc85 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -362,31 +362,44 @@ do_triangle_ccw(struct setup_context *setup, tri->ei2 = tri->dx23 - tri->dy23 - tri->eo2; tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3; + /* Fill in the inputs.step[][] arrays. + * We've manually unrolled some loops here. + */ { const int xstep1 = -tri->dy12; const int xstep2 = -tri->dy23; const int xstep3 = -tri->dy31; - const int ystep1 = tri->dx12; const int ystep2 = tri->dx23; const int ystep3 = tri->dx31; - - int qx, qy, ix, iy; - int i = 0; - - for (qy = 0; qy < 2; qy++) { - for (qx = 0; qx < 2; qx++) { - for (iy = 0; iy < 2; iy++) { - for (ix = 0; ix < 2; ix++, i++) { - int x = qx * 2 + ix; - int y = qy * 2 + iy; - tri->inputs.step[0][i] = x * xstep1 + y * ystep1; - tri->inputs.step[1][i] = x * xstep2 + y * ystep2; - tri->inputs.step[2][i] = x * xstep3 + y * ystep3; - } - } - } - } + +#define SETUP_STEP(i, x, y) \ + do { \ + tri->inputs.step[0][i] = x * xstep1 + y * ystep1; \ + tri->inputs.step[1][i] = x * xstep2 + y * ystep2; \ + tri->inputs.step[2][i] = x * xstep3 + y * ystep3; \ + } while (0) + + SETUP_STEP(0, 0, 0); + SETUP_STEP(1, 1, 0); + SETUP_STEP(2, 0, 1); + SETUP_STEP(3, 1, 1); + + SETUP_STEP(4, 2, 0); + SETUP_STEP(5, 3, 0); + SETUP_STEP(6, 2, 1); + SETUP_STEP(7, 3, 1); + + SETUP_STEP(8, 0, 2); + SETUP_STEP(9, 1, 2); + SETUP_STEP(10, 0, 3); + SETUP_STEP(11, 1, 3); + + SETUP_STEP(12, 2, 2); + SETUP_STEP(13, 3, 2); + SETUP_STEP(14, 2, 3); + SETUP_STEP(15, 3, 3); +#undef STEP } /* -- cgit v1.2.3 From ff9b55da9a6e3b5aa2d42eac7d79c675a679af57 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 21 Jan 2010 17:31:43 -0700 Subject: llvmpipe: area is an int here, not float --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index dcd849bc85..b637c35735 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -265,7 +265,8 @@ do_triangle_ccw(struct setup_context *setup, struct lp_scene *scene = lp_setup_get_current_scene(setup); struct lp_rast_triangle *tri = lp_scene_alloc_aligned( scene, sizeof *tri, 16 ); - float area, oneoverarea; + int area; + float oneoverarea; int minx, maxx, miny, maxy; tri->dx12 = x1 - x2; @@ -276,8 +277,7 @@ do_triangle_ccw(struct setup_context *setup, tri->dy23 = y2 - y3; tri->dy31 = y3 - y1; - area = (tri->dx12 * tri->dy31 - - tri->dx31 * tri->dy12); + area = (tri->dx12 * tri->dy31 - tri->dx31 * tri->dy12); LP_COUNT(nr_tris); @@ -285,7 +285,7 @@ do_triangle_ccw(struct setup_context *setup, * * XXX: subject to overflow?? */ - if (area <= 0.0f) { + if (area <= 0) { lp_scene_putback_data( scene, sizeof *tri ); LP_COUNT(nr_culled_tris); return; -- cgit v1.2.3 From 798a9d3f942df1953a538073c85d6a6fed3775db Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 21 Jan 2010 18:25:31 -0700 Subject: llvmpipe: re-use a1 var in linear_coef() --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index b637c35735..15534756c4 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -89,7 +89,7 @@ static void linear_coef( struct lp_rast_triangle *tri, * to define a0 as the sample at a pixel center somewhere near vmin * instead - i'll switch to this later. */ - tri->inputs.a0[slot][i] = (v1[vert_attr][i] - + tri->inputs.a0[slot][i] = (a1 - (dadx * (v1[0][0] - 0.5f) + dady * (v1[0][1] - 0.5f))); } -- cgit v1.2.3 From e24ea786faad502da63cc4d59b0c30e3f1915c45 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 21 Jan 2010 19:04:53 -0700 Subject: llvmpipe: consolidate lp_scene_alloc_aligned() calls Use just one call instead of four. Good for a few more fps. --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 54 ++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 15534756c4..e5e64c3e5c 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -181,18 +181,8 @@ static void setup_tri_coefficients( struct setup_context *setup, const float (*v3)[4], boolean frontface) { - struct lp_scene *scene = lp_setup_get_current_scene(setup); unsigned slot; - /* Allocate space for the a0, dadx and dady arrays - */ - { - unsigned bytes = (setup->fs.nr_inputs + 1) * 4 * sizeof(float); - tri->inputs.a0 = lp_scene_alloc_aligned( scene, bytes, 16 ); - tri->inputs.dadx = lp_scene_alloc_aligned( scene, bytes, 16 ); - tri->inputs.dady = lp_scene_alloc_aligned( scene, bytes, 16 ); - } - /* The internal position input is in slot zero: */ setup_fragcoord_coef(tri, oneoverarea, 0, v1, v2, v3); @@ -243,6 +233,41 @@ static inline int subpixel_snap( float a ) } + +/** + * Alloc space for a new triangle plus the input.a0/dadx/dady arrays + * immediately after it. + * The memory is allocated from the per-scene pool, not per-tile. + * \param tri_size returns number of bytes allocated + * \param nr_inputs number of fragment shader inputs + * \return pointer to triangle space + */ +static INLINE struct lp_rast_triangle * +alloc_triangle(struct lp_scene *scene, unsigned nr_inputs, unsigned *tri_size) +{ + unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float); + struct lp_rast_triangle *tri; + unsigned bytes; + char *inputs; + + assert(sizeof(*tri) % 16 == 0); + + bytes = sizeof(*tri) + (3 * input_array_sz); + + tri = lp_scene_alloc_aligned( scene, bytes, 16 ); + + inputs = (char *) (tri + 1); + tri->inputs.a0 = (float (*)[4]) inputs; + tri->inputs.dadx = (float (*)[4]) (inputs + input_array_sz); + tri->inputs.dady = (float (*)[4]) (inputs + 2 * input_array_sz); + + *tri_size = bytes; + + return tri; +} + + + /** * Do basic setup for triangle rasterization and determine which * framebuffer tiles are touched. Put the triangle in the scene's @@ -264,10 +289,13 @@ do_triangle_ccw(struct setup_context *setup, const int y3 = subpixel_snap(v3[0][1]); struct lp_scene *scene = lp_setup_get_current_scene(setup); - struct lp_rast_triangle *tri = lp_scene_alloc_aligned( scene, sizeof *tri, 16 ); + struct lp_rast_triangle *tri; int area; float oneoverarea; int minx, maxx, miny, maxy; + unsigned tri_bytes; + + tri = alloc_triangle(scene, setup->fs.nr_inputs, &tri_bytes); tri->dx12 = x1 - x2; tri->dx23 = x2 - x3; @@ -286,7 +314,7 @@ do_triangle_ccw(struct setup_context *setup, * XXX: subject to overflow?? */ if (area <= 0) { - lp_scene_putback_data( scene, sizeof *tri ); + lp_scene_putback_data( scene, tri_bytes ); LP_COUNT(nr_culled_tris); return; } @@ -306,7 +334,7 @@ do_triangle_ccw(struct setup_context *setup, if (miny == maxy || minx == maxx) { - lp_scene_putback_data( scene, sizeof *tri ); + lp_scene_putback_data( scene, tri_bytes ); LP_COUNT(nr_culled_tris); return; } -- cgit v1.2.3 From 1d23954a0848f8dd87b214f3a7ec3ae3c04ab0c1 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 21 Jan 2010 19:05:54 -0700 Subject: llvmpipe: s/inline/INLINE/ --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index e5e64c3e5c..9e59a6602c 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -227,7 +227,7 @@ static void setup_tri_coefficients( struct setup_context *setup, -static inline int subpixel_snap( float a ) +static INLINE int subpixel_snap( float a ) { return util_iround(FIXED_ONE * a - (FIXED_ONE / 2)); } -- cgit v1.2.3 From 3bca8691b51a1ca91572c62139f28b64c558ada2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 21 Jan 2010 19:09:59 -0700 Subject: llvmpipe: use some local vars to index step arrays Saves a few more cycles. --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index e9d15727a7..3f76f159df 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -153,16 +153,18 @@ do_block_16( struct lp_rasterizer_task *rast_task, const int eo1 = tri->eo1 * 4; const int eo2 = tri->eo2 * 4; const int eo3 = tri->eo3 * 4; - + const int *step0 = tri->inputs.step[0]; + const int *step1 = tri->inputs.step[1]; + const int *step2 = tri->inputs.step[2]; int i; assert(x % 16 == 0); assert(y % 16 == 0); for (i = 0; i < 16; i++) { - int cx1 = c1 + (tri->inputs.step[0][i] * 4); - int cx2 = c2 + (tri->inputs.step[1][i] * 4); - int cx3 = c3 + (tri->inputs.step[2][i] * 4); + int cx1 = c1 + step0[i] * 4; + int cx2 = c2 + step1[i] * 4; + int cx3 = c3 + step2[i] * 4; if (cx1 + eo1 < 0 || cx2 + eo2 < 0 || -- cgit v1.2.3 From 99f1e32fadbf16c167350af3304b2d68c464452a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 27 Jan 2010 13:46:23 -0700 Subject: gallium/util: print dlerror() info upon dlopen() failure --- src/gallium/auxiliary/util/u_dl.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/util/u_dl.c b/src/gallium/auxiliary/util/u_dl.c index b42b429d4d..d8803f77fa 100644 --- a/src/gallium/auxiliary/util/u_dl.c +++ b/src/gallium/auxiliary/util/u_dl.c @@ -26,8 +26,8 @@ * **************************************************************************/ - #include "pipe/p_config.h" +#include "util/u_debug.h" #if defined(PIPE_OS_UNIX) #include @@ -43,7 +43,12 @@ struct util_dl_library * util_dl_open(const char *filename) { #if defined(PIPE_OS_UNIX) - return (struct util_dl_library *)dlopen(filename, RTLD_LAZY | RTLD_GLOBAL); + struct util_dl_library *lib; + lib = (struct util_dl_library *)dlopen(filename, RTLD_LAZY | RTLD_GLOBAL); + if (!lib) { + debug_printf("gallium: dlopen() failed: %s\n", dlerror()); + } + return lib; #elif defined(PIPE_OS_WINDOWS) return (struct util_dl_library *)LoadLibraryA(filename); #else -- cgit v1.2.3 From 5460da543608805a3debbb401ccc19442e1cb476 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 27 Jan 2010 13:46:43 -0700 Subject: gallium/util: comments for time-related functions --- src/gallium/auxiliary/util/u_time.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/gallium/auxiliary/util/u_time.h b/src/gallium/auxiliary/util/u_time.h index a6189a247b..29fd1cbc67 100644 --- a/src/gallium/auxiliary/util/u_time.h +++ b/src/gallium/auxiliary/util/u_time.h @@ -74,14 +74,23 @@ struct util_time void util_time_get(struct util_time *t); +/** + * Return t2 = t1 + usecs + */ void util_time_add(const struct util_time *t1, int64_t usecs, struct util_time *t2); +/** + * Return current time in microseconds + */ uint64_t util_time_micros( void ); +/** + * Return difference between times, in microseconds + */ int64_t util_time_diff(const struct util_time *t1, const struct util_time *t2); -- cgit v1.2.3 From e95ad2a2b521514eaec04f9b266ee030ecc639a3 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 27 Jan 2010 13:49:43 -0700 Subject: llvmpipe: count/report time spent in LLVM compilations --- src/gallium/drivers/llvmpipe/lp_perf.c | 4 ++++ src/gallium/drivers/llvmpipe/lp_perf.h | 4 ++++ src/gallium/drivers/llvmpipe/lp_state_fs.c | 20 ++++++++++++++++---- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_perf.c b/src/gallium/drivers/llvmpipe/lp_perf.c index 2628d51069..042218b27f 100644 --- a/src/gallium/drivers/llvmpipe/lp_perf.c +++ b/src/gallium/drivers/llvmpipe/lp_perf.c @@ -82,5 +82,9 @@ lp_print_counters(void) debug_printf("llvmpipe: nr_empty_4x4: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_4, p1, total_4); debug_printf("llvmpipe: nr_non_empty_4x4: %9u (%2.0f%% of %u)\n", lp_count.nr_non_empty_4, p2, total_4); + + debug_printf("llvmpipe: nr_llvm_compiles: %u\n", lp_count.nr_llvm_compiles); + debug_printf("llvmpipe: total LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0); + debug_printf("llvmpipe: average LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0 / lp_count.nr_llvm_compiles); } } diff --git a/src/gallium/drivers/llvmpipe/lp_perf.h b/src/gallium/drivers/llvmpipe/lp_perf.h index 9886088c38..d982bcc989 100644 --- a/src/gallium/drivers/llvmpipe/lp_perf.h +++ b/src/gallium/drivers/llvmpipe/lp_perf.h @@ -49,6 +49,8 @@ struct lp_counters unsigned nr_partially_covered_16; unsigned nr_empty_4; unsigned nr_non_empty_4; + unsigned nr_llvm_compiles; + int64_t llvm_compile_time; /**< total, in microseconds */ }; @@ -58,8 +60,10 @@ extern struct lp_counters lp_count; /** Increment the named counter (only for debug builds) */ #ifdef DEBUG #define LP_COUNT(counter) lp_count.counter++ +#define LP_COUNT_ADD(counter, incr) lp_count.counter += (incr) #else #define LP_COUNT(counter) +#define LP_COUNT_ADD(counter, incr) (void) incr #endif diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 0053c1b88b..a7514ee011 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -65,6 +65,7 @@ #include "util/u_memory.h" #include "util/u_format.h" #include "util/u_debug_dump.h" +#include "util/u_time.h" #include "pipe/internal/p_winsys_screen.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" @@ -84,13 +85,14 @@ #include "lp_bld_swizzle.h" #include "lp_bld_flow.h" #include "lp_bld_debug.h" -#include "lp_screen.h" -#include "lp_context.h" #include "lp_buffer.h" +#include "lp_context.h" +#include "lp_debug.h" +#include "lp_perf.h" +#include "lp_screen.h" #include "lp_setup.h" #include "lp_state.h" #include "lp_tex_sample.h" -#include "lp_debug.h" static const unsigned char quad_offset_x[4] = {0, 1, 0, 1}; @@ -1108,9 +1110,19 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) variant = variant->next; } - if(!variant) + if (!variant) { + struct util_time t0, t1; + int64_t dt; + util_time_get(&t0); + variant = generate_variant(lp, shader, &key); + util_time_get(&t1); + dt = util_time_diff(&t0, &t1); + LP_COUNT_ADD(llvm_compile_time, dt); + LP_COUNT_ADD(nr_llvm_compiles, 2); /* emit vs. omit in/out test */ + } + shader->current = variant; /* TODO: put this in the variant */ -- cgit v1.2.3 From 36a0819ff4ede1af91dcf909106cf20659856384 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 27 Jan 2010 17:16:42 -0700 Subject: llvmpipe: added debug option to disable LLVM optimization passes --- src/gallium/drivers/llvmpipe/lp_debug.h | 1 + src/gallium/drivers/llvmpipe/lp_jit.c | 30 +++++++++++++++++------------- src/gallium/drivers/llvmpipe/lp_screen.c | 1 + 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_debug.h b/src/gallium/drivers/llvmpipe/lp_debug.h index 7e04bd471e..ee81814361 100644 --- a/src/gallium/drivers/llvmpipe/lp_debug.h +++ b/src/gallium/drivers/llvmpipe/lp_debug.h @@ -48,6 +48,7 @@ st_print_current(void); #define DEBUG_SHOW_TILES 0x200 #define DEBUG_SHOW_SUBTILES 0x400 #define DEBUG_COUNTERS 0x800 +#define DEBUG_NO_LLVM_OPT 0x1000 #ifdef DEBUG diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 429cb973c2..9fad7033db 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -37,6 +37,7 @@ #include "util/u_memory.h" #include "util/u_cpu_detect.h" +#include "lp_debug.h" #include "lp_screen.h" #include "lp_bld_intr.h" #include "lp_bld_misc.h" @@ -165,20 +166,23 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) screen->pass = LLVMCreateFunctionPassManager(screen->provider); LLVMAddTargetData(screen->target, screen->pass); - /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, - * but there are more on SVN. */ - /* TODO: Add more passes */ - LLVMAddConstantPropagationPass(screen->pass); - if(util_cpu_caps.has_sse4_1) { - /* FIXME: There is a bug in this pass, whereby the combination of fptosi - * and sitofp (necessary for trunc/floor/ceil/round implementation) - * somehow becomes invalid code. - */ - LLVMAddInstructionCombiningPass(screen->pass); + + if ((LP_DEBUG & DEBUG_NO_LLVM_OPT) == 0) { + /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, + * but there are more on SVN. */ + /* TODO: Add more passes */ + LLVMAddConstantPropagationPass(screen->pass); + if(util_cpu_caps.has_sse4_1) { + /* FIXME: There is a bug in this pass, whereby the combination of fptosi + * and sitofp (necessary for trunc/floor/ceil/round implementation) + * somehow becomes invalid code. + */ + LLVMAddInstructionCombiningPass(screen->pass); + } + LLVMAddPromoteMemoryToRegisterPass(screen->pass); + LLVMAddGVNPass(screen->pass); + LLVMAddCFGSimplificationPass(screen->pass); } - LLVMAddPromoteMemoryToRegisterPass(screen->pass); - LLVMAddGVNPass(screen->pass); - LLVMAddCFGSimplificationPass(screen->pass); lp_jit_init_globals(screen); } diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 9dd4ea7ef6..a3adc81e9f 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -55,6 +55,7 @@ static const struct debug_named_value lp_debug_flags[] = { { "show_tiles", DEBUG_SHOW_TILES }, { "show_subtiles", DEBUG_SHOW_SUBTILES }, { "counters", DEBUG_COUNTERS }, + { "nopt", DEBUG_NO_LLVM_OPT }, {NULL, 0} }; #endif -- cgit v1.2.3