From 70dd4379d2cd54f229c3940312537912470218d3 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 10:34:13 -0600 Subject: cell: implement fencing for texture buffers If we delete a texture, we need to keep the underlying tiled data buffer around until any rendering that references it has completed. Keep a list of buffers referenced by a rendering batch. Unref/free them when the associated batch's fence is executed/signalled. --- src/gallium/drivers/cell/common.h | 25 ++++ src/gallium/drivers/cell/ppu/Makefile | 1 + src/gallium/drivers/cell/ppu/cell_batch.c | 32 +++++ src/gallium/drivers/cell/ppu/cell_context.c | 6 + src/gallium/drivers/cell/ppu/cell_context.h | 21 ++++ src/gallium/drivers/cell/ppu/cell_fence.c | 158 +++++++++++++++++++++++++ src/gallium/drivers/cell/ppu/cell_fence.h | 57 +++++++++ src/gallium/drivers/cell/ppu/cell_state_emit.c | 2 +- src/gallium/drivers/cell/ppu/cell_texture.c | 33 ++++-- src/gallium/drivers/cell/ppu/cell_texture.h | 5 +- src/gallium/drivers/cell/ppu/cell_vbuf.c | 6 + src/gallium/drivers/cell/spu/spu_command.c | 38 +++++- src/gallium/drivers/cell/spu/spu_main.h | 2 +- 13 files changed, 367 insertions(+), 19 deletions(-) create mode 100644 src/gallium/drivers/cell/ppu/cell_fence.c create mode 100644 src/gallium/drivers/cell/ppu/cell_fence.h diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 9ca4e9d67e..23fb0b0831 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -102,6 +102,8 @@ #define CELL_CMD_STATE_RASTERIZER 22 #define CELL_CMD_VS_EXECUTE 23 #define CELL_CMD_FLUSH_BUFFER_RANGE 24 +#define CELL_CMD_FENCE 25 + /** Command/batch buffers */ #define CELL_NUM_BUFFERS 4 @@ -123,6 +125,29 @@ #define SPU_MAX_FRAGMENT_OPS_INSTS 64 + +#define CELL_FENCE_IDLE 0 +#define CELL_FENCE_EMITTED 1 +#define CELL_FENCE_SIGNALLED 2 + +struct cell_fence +{ + /** There's a 16-byte status qword per SPU */ + volatile uint status[CELL_MAX_SPUS][4]; +}; + + +/** + * Fence command sent to SPUs. In response, the SPUs will write + * CELL_FENCE_STATUS_SIGNALLED back to the fence status word in main memory. + */ +struct cell_command_fence +{ + uint64_t opcode; /**< CELL_CMD_FENCE */ + struct cell_fence *fence; +}; + + /** * Command to specify per-fragment operations state and generated code. * Note that the dsa, blend, blend_color fields are really only needed diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index b28f4c5c31..9358a47284 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -24,6 +24,7 @@ SOURCES = \ cell_clear.c \ cell_context.c \ cell_draw_arrays.c \ + cell_fence.c \ cell_flush.c \ cell_gen_fragment.c \ cell_gen_fp.c \ diff --git a/src/gallium/drivers/cell/ppu/cell_batch.c b/src/gallium/drivers/cell/ppu/cell_batch.c index 01254aed60..448b723d85 100644 --- a/src/gallium/drivers/cell/ppu/cell_batch.c +++ b/src/gallium/drivers/cell/ppu/cell_batch.c @@ -28,6 +28,7 @@ #include "cell_context.h" #include "cell_batch.h" +#include "cell_fence.h" #include "cell_spu.h" @@ -63,6 +64,10 @@ cell_get_empty_buffer(struct cell_context *cell) printf("PPU: ALLOC BUFFER %u, %u tries\n", buf, tries); */ prev_buffer = buf; + + /* release tex buffer associated w/ prev use of this batch buf */ + cell_free_fenced_buffers(cell, &cell->fenced_buffers[buf]); + return buf; } } @@ -84,6 +89,26 @@ cell_get_empty_buffer(struct cell_context *cell) } +/** + * Append a fence command to the current batch buffer. + * Note that we're sure there's always room for this because of the + * adjusted size check in cell_batch_free_space(). + */ +static void +emit_fence(struct cell_context *cell) +{ + const uint batch = cell->cur_batch; + const uint size = cell->buffer_size[batch]; + struct cell_command_fence *fence_cmd; + + ASSERT(size + sizeof(struct cell_command_fence) <= CELL_BUFFER_SIZE); + + fence_cmd = (struct cell_command_fence *) (cell->buffer[batch] + size); + fence_cmd->opcode = CELL_CMD_FENCE; + fence_cmd->fence = &cell->fenced_buffers[batch].fence; +} + + /** * Flush the current batch buffer to the SPUs. * An empty buffer will be found and set as the new current batch buffer @@ -102,6 +127,12 @@ cell_batch_flush(struct cell_context *cell) if (size == 0) return; + /* Before we use this batch buffer, make sure any fenced texture buffers + * are released. + */ + if (cell->fenced_buffers[batch].head) + emit_fence(cell); + flushing = TRUE; assert(batch < CELL_NUM_BUFFERS); @@ -142,6 +173,7 @@ uint cell_batch_free_space(const struct cell_context *cell) { uint free = CELL_BUFFER_SIZE - cell->buffer_size[cell->cur_batch]; + free -= sizeof(struct cell_command_fence); return free; } diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index 7a2d93ecb4..22d552d8e3 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -47,6 +47,7 @@ #include "cell_clear.h" #include "cell_context.h" #include "cell_draw_arrays.h" +#include "cell_fence.h" #include "cell_flush.h" #include "cell_state.h" #include "cell_surface.h" @@ -104,6 +105,7 @@ cell_create_context(struct pipe_screen *screen, struct cell_winsys *cws) { struct cell_context *cell; + uint i; /* some fields need to be 16-byte aligned, so align the whole object */ cell = (struct cell_context*) align_malloc(sizeof(struct cell_context), 16); @@ -151,6 +153,10 @@ cell_create_context(struct pipe_screen *screen, cell_debug_flags, 0 ); + for (i = 0; i < CELL_NUM_BUFFERS; i++) + cell_fence_init(&cell->fenced_buffers[i].fence); + + /* * SPU stuff */ diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index ad1f4829a4..4491ae8cdf 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -81,6 +81,19 @@ struct cell_fragment_ops_key }; +struct cell_buffer_node; + +/** + * Fenced buffer list. List of buffers which can be unreferenced after + * the fence has been executed/signalled. + */ +struct cell_buffer_list +{ + struct cell_fence fence; + struct cell_buffer_node *head; +}; + + /** * Per-context state, subclass of pipe_context. */ @@ -154,6 +167,14 @@ struct cell_context uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4] ALIGN16_ATTRIB; + /** Associated with each command/batch buffer is a list of pipe_buffers + * that are fenced. When the last command in a buffer is executed, the + * fence will be signalled, indicating that any pipe_buffers preceeding + * that fence can be unreferenced (and probably freed). + */ + struct cell_buffer_list fenced_buffers[CELL_NUM_BUFFERS]; + + struct spe_function attrib_fetch; unsigned attrib_fetch_offsets[PIPE_MAX_ATTRIBS]; diff --git a/src/gallium/drivers/cell/ppu/cell_fence.c b/src/gallium/drivers/cell/ppu/cell_fence.c new file mode 100644 index 0000000000..ffb3bea12b --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_fence.c @@ -0,0 +1,158 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_fence.h" +#include "cell_texture.h" + + +void +cell_fence_init(struct cell_fence *fence) +{ + uint i; + for (i = 0; i < CELL_MAX_SPUS; i++) { + fence->status[i][0] = CELL_FENCE_IDLE; + } +} + + +boolean +cell_fence_signalled(const struct cell_context *cell, + const struct cell_fence *fence) +{ + uint i; + for (i = 0; i < cell->num_spus; i++) { + //ASSERT(fence->status[i][0] != CELL_FENCE_IDLE); + if (fence->status[i][0] == CELL_FENCE_EMITTED) + return FALSE; + } + return TRUE; +} + + +void +cell_fence_finish(const struct cell_context *cell, + const struct cell_fence *fence) +{ + while (!cell_fence_signalled(cell, fence)) { + usleep(10); + } +} + + + + +struct cell_buffer_node +{ + struct pipe_buffer *buffer; + struct cell_buffer_node *next; +}; + + +static void +cell_add_buffer_to_list(struct cell_context *cell, + struct cell_buffer_list *list, + struct pipe_buffer *buffer) +{ + struct pipe_screen *ps = cell->pipe.screen; + struct cell_buffer_node *node = CALLOC_STRUCT(cell_buffer_node); + /* create new list node which references the buffer, insert at head */ + if (node) { + pipe_buffer_reference(ps, &node->buffer, buffer); + node->next = list->head; + list->head = node; + } +} + + +/** + * Wait for completion of the given fence, then unreference any buffers + * on the list. + * This typically unrefs/frees texture buffers after any rendering which uses + * them has completed. + */ +void +cell_free_fenced_buffers(struct cell_context *cell, + struct cell_buffer_list *list) +{ + if (list->head) { + struct pipe_screen *ps = cell->pipe.screen; + struct cell_buffer_node *node; + + cell_fence_finish(cell, &list->fence); + + /* traverse the list, unreferencing buffers, freeing nodes */ + node = list->head; + while (node) { + struct cell_buffer_node *next = node->next; + assert(node->buffer); + pipe_buffer_unmap(ps, node->buffer); +#if 0 + printf("Unref buffer %p\n", node->buffer); + if (node->buffer->refcount == 1) + printf(" Delete!\n"); +#endif + pipe_buffer_reference(ps, &node->buffer, NULL); + FREE(node); + node = next; + } + list->head = NULL; + } +} + + +/** + * This should be called for each render command. + * Any texture buffers that are current bound will be added to a fenced + * list to be freed later when the fence is executed/signalled. + */ +void +cell_add_fenced_textures(struct cell_context *cell) +{ + struct cell_buffer_list *list = &cell->fenced_buffers[cell->cur_batch]; + uint i; + + for (i = 0; i < cell->num_textures; i++) { + struct cell_texture *ct = cell->texture[i]; + if (ct) { + uint level; + for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { + if (ct->tiled_buffer[level]) { +#if 0 + printf("Adding texture %p buffer %p to list\n", + ct, ct->tiled_buffer[level]); +#endif + cell_add_buffer_to_list(cell, list, ct->tiled_buffer[level]); + } + } + } + } +} diff --git a/src/gallium/drivers/cell/ppu/cell_fence.h b/src/gallium/drivers/cell/ppu/cell_fence.h new file mode 100644 index 0000000000..536b4ba411 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_fence.h @@ -0,0 +1,57 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_FENCE_H +#define CELL_FENCE_H + + +extern void +cell_fence_init(struct cell_fence *fence); + + +extern boolean +cell_fence_signalled(const struct cell_context *cell, + const struct cell_fence *fence); + + +extern void +cell_fence_finish(const struct cell_context *cell, + const struct cell_fence *fence); + + + +extern void +cell_free_fenced_buffers(struct cell_context *cell, + struct cell_buffer_list *list); + + +extern void +cell_add_fenced_textures(struct cell_context *cell); + + +#endif /* CELL_FENCE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index effcd2a1e1..dd2d7f7d1e 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -225,7 +225,7 @@ cell_emit_state(struct cell_context *cell) if (cell->texture[i]) { uint level; for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { - texture->start[level] = cell->texture[i]->tiled_data[level]; + texture->start[level] = cell->texture[i]->tiled_mapped[level]; texture->width[level] = cell->texture[i]->base.width[level]; texture->height[level] = cell->texture[i]->base.height[level]; texture->depth[level] = cell->texture[i]->base.depth[level]; diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 9c6741f1bc..9ac2f3bbb9 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -136,6 +136,9 @@ cell_texture_release(struct pipe_screen *screen, __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); */ if (--(*pt)->refcount <= 0) { + /* Delete this texture now. + * But note that the underlying pipe_buffer may linger... + */ struct cell_texture *ct = cell_texture(*pt); uint i; @@ -146,14 +149,12 @@ cell_texture_release(struct pipe_screen *screen, pipe_buffer_reference(screen, &ct->buffer, NULL); for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { - if (ct->tiled_data[i]) { - /* XXX need to use a fenced buffer for tiled data so that - * it's properly freed after rendering has completed. - * Disabling this free() allows glDrawPixels to work for now. - */ -#if 0 - align_free(ct->tiled_data[i]); -#endif + /* Unreference the tiled image buffer. + * It may not actually be deleted until a fence is hit. + */ + if (ct->tiled_buffer[i]) { + ct->tiled_mapped[i] = NULL; + winsys_buffer_reference(screen->winsys, &ct->tiled_buffer[i], NULL); } } @@ -234,12 +235,18 @@ cell_twiddle_texture(struct pipe_screen *screen, int offset = bufWidth * bufHeight * 4 * surface->face; uint *dst; - if (!ct->tiled_data[level]) { - ct->tiled_data[level] = - align_malloc(bufWidth * bufHeight * 4 * numFaces, 16); + if (!ct->tiled_buffer[level]) { + /* allocate buffer for tiled data now */ + struct pipe_winsys *ws = screen->winsys; + uint bytes = bufWidth * bufHeight * 4 * numFaces; + ct->tiled_buffer[level] = ws->buffer_create(ws, 16, + PIPE_BUFFER_USAGE_PIXEL, + bytes); + /* and map it */ + ct->tiled_mapped[level] = ws->buffer_map(ws, ct->tiled_buffer[level], + PIPE_BUFFER_USAGE_GPU_READ); } - - dst = (uint *) ((ubyte *) ct->tiled_data[level] + offset); + dst = (uint *) ((ubyte *) ct->tiled_mapped[level] + offset); twiddle_image_uint(texWidth, texHeight, TILE_SIZE, dst, surface->stride, src); diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h index a0757091b0..2f5fe0dd1b 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.h +++ b/src/gallium/drivers/cell/ppu/cell_texture.h @@ -48,7 +48,10 @@ struct cell_texture struct pipe_buffer *buffer; unsigned long buffer_size; - void *tiled_data[CELL_MAX_TEXTURE_LEVELS]; /* XXX this may be temporary */ /*ALIGN16*/ + /** Texture data in tiled layout is held here */ + struct pipe_buffer *tiled_buffer[CELL_MAX_TEXTURE_LEVELS]; + /** Mapped, tiled texture data */ + void *tiled_mapped[CELL_MAX_TEXTURE_LEVELS]; }; diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c index aa63435b93..65ba51b6bb 100644 --- a/src/gallium/drivers/cell/ppu/cell_vbuf.c +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c @@ -38,6 +38,7 @@ #include "cell_batch.h" #include "cell_context.h" +#include "cell_fence.h" #include "cell_flush.h" #include "cell_spu.h" #include "cell_vbuf.h" @@ -108,6 +109,11 @@ cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, __FUNCTION__, cvbr->vertex_buf, vertices_used); */ + /* Make sure texture buffers aren't released until we're done rendering + * with them. + */ + cell_add_fenced_textures(cell); + /* Tell SPUs they can release the vert buf */ if (cvbr->vertex_buf != ~0U) { struct cell_command_release_verts *release diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index 9c853c0961..a6ed29ea63 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -76,9 +76,10 @@ static void release_buffer(uint buffer) { /* Evidently, using less than a 16-byte status doesn't work reliably */ - static const uint status[4] ALIGN16_ATTRIB - = {CELL_BUFFER_STATUS_FREE, 0, 0, 0}; - + static const vector unsigned int status = {CELL_BUFFER_STATUS_FREE, + CELL_BUFFER_STATUS_FREE, + CELL_BUFFER_STATUS_FREE, + CELL_BUFFER_STATUS_FREE}; const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer); uint *dst = spu.init.buffer_status + index; @@ -93,6 +94,29 @@ release_buffer(uint buffer) } +/** + * Write CELL_FENCE_SIGNALLED back to the fence status qword in main memory. + * There's a qword of status per SPU. + */ +static void +cmd_fence(struct cell_command_fence *fence_cmd) +{ + static const vector unsigned int status = {CELL_FENCE_SIGNALLED, + CELL_FENCE_SIGNALLED, + CELL_FENCE_SIGNALLED, + CELL_FENCE_SIGNALLED}; + uint *dst = (uint *) fence_cmd->fence; + dst += 4 * spu.init.id; /* main store/memory address, not local store */ + + mfc_put((void *) &status, /* src in local memory */ + (unsigned int) dst, /* dst in main memory */ + sizeof(status), /* size */ + TAG_FENCE, /* tag */ + 0, /* tid */ + 0 /* rid */); +} + + static void cmd_clear_surface(const struct cell_command_clear_surface *clear) { @@ -637,6 +661,14 @@ cmd_batch(uint opcode) cmd_finish(); pos += 1; break; + case CELL_CMD_FENCE: + { + struct cell_command_fence *fence_cmd = + (struct cell_command_fence *) &buffer[pos]; + cmd_fence(fence_cmd); + pos += sizeof(*fence_cmd) / 8; + } + break; case CELL_CMD_RELEASE_VERTS: { struct cell_command_release_verts *release diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 95ef4c9244..668af10be2 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -210,7 +210,7 @@ extern struct spu_global spu; #define TAG_DCACHE1 21 #define TAG_DCACHE2 22 #define TAG_DCACHE3 23 - +#define TAG_FENCE 24 static INLINE void -- cgit v1.2.3