From e3a6e60040b7f6ea7965e52f8f9881ed31e0347c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 7 Dec 2007 16:15:49 -0800 Subject: [965] Convert the driver to dri_bufmgr interface and enable TTM. This is currently believed to work but be a significant performance loss. Performance recovery should be soon to follow. The dri_bo_fake_disable_backing_store() call was added to allow backing store disable like bufmgr_fake.c did, which is a significant performance win (though it's missing the no-fence-subdata part). This commit is a squash merge of the 965-ttm branch, which had some history I wanted to avoid pulling due to noisiness and brokenness at many points for git-bisecting. --- src/mesa/drivers/dri/i965/brw_misc_state.c | 143 ++++++++++++----------------- 1 file changed, 60 insertions(+), 83 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_misc_state.c') diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index d5779680ff..210745c63b 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -123,21 +123,18 @@ const struct brw_tracked_state brw_drawing_rect = { .update = upload_drawing_rect }; -/*********************************************************************** - * Binding table pointers +/** + * Upload the binding table pointers, which point each stage's array of surface + * state pointers. + * + * The binding table pointers are relative to the surface state base address, + * which is the BRW_SS_POOL cache buffer. */ - static void upload_binding_table_pointers(struct brw_context *brw) { struct brw_binding_table_pointers btp; memset(&btp, 0, sizeof(btp)); - /* The binding table has been emitted to the SS pool already, so we - * know what its offset is. When the batch buffer is fired, the - * binding table and surface structs will get fixed up to point to - * where the textures actually landed, but that won't change the - * value of the offsets here: - */ btp.header.opcode = CMD_BINDING_TABLE_PTRS; btp.header.length = sizeof(btp)/4 - 2; btp.vs = 0; @@ -159,11 +156,12 @@ const struct brw_tracked_state brw_binding_table_pointers = { }; -/*********************************************************************** - * Pipelined state pointers. This is the key state packet from which - * the hardware chases pointers to all the uploaded state in VRAM. +/** + * Upload pointers to the per-stage state. + * + * The state pointers in this packet are all relative to the general state + * base address set by CMD_STATE_BASE_ADDRESS, which is the BRW_GS_POOL buffer. */ - static void upload_pipelined_state_pointers(struct brw_context *brw ) { struct brw_pipelined_state_pointers psp; @@ -233,71 +231,53 @@ const struct brw_tracked_state brw_psp_urb_cbs = { .update = upload_psp_urb_cbs }; - - - -/*********************************************************************** - * Depthbuffer - currently constant, but rotation would change that. +/** + * Upload the depthbuffer offset and format. + * + * We have to do this per state validation as we need to emit the relocation + * in the batch buffer. */ - static void upload_depthbuffer(struct brw_context *brw) { - /* 0x79050003 Depth Buffer */ struct intel_context *intel = &brw->intel; struct intel_region *region = brw->state.depth_region; - struct brw_depthbuffer bd; - memset(&bd, 0, sizeof(bd)); - bd.header.bits.opcode = CMD_DEPTH_BUFFER; - bd.header.bits.length = sizeof(bd)/4-2; - bd.dword1.bits.pitch = (region->pitch * region->cpp) - 1; - + unsigned int format; + switch (region->cpp) { case 2: - bd.dword1.bits.format = BRW_DEPTHFORMAT_D16_UNORM; + format = BRW_DEPTHFORMAT_D16_UNORM; break; case 4: if (intel->depth_buffer_is_float) - bd.dword1.bits.format = BRW_DEPTHFORMAT_D32_FLOAT; + format = BRW_DEPTHFORMAT_D32_FLOAT; else - bd.dword1.bits.format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; + format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; break; default: assert(0); return; } - bd.dword1.bits.depth_offset_disable = 0; /* coordinate offset */ - - /* The depthbuffer can only use YMAJOR tiling... This is a bit of - * a shame as it clashes with the 2d blitter which only supports - * XMAJOR tiling... - */ - bd.dword1.bits.tile_walk = BRW_TILEWALK_YMAJOR; - bd.dword1.bits.tiled_surface = intel->depth_region->tiled; - bd.dword1.bits.surface_type = BRW_SURFACE_2D; - - /* BRW_NEW_LOCK */ - bd.dword2_base_addr = bmBufferOffset(intel, region->buffer); - - bd.dword3.bits.mipmap_layout = BRW_SURFACE_MIPMAPLAYOUT_BELOW; - bd.dword3.bits.lod = 0; - bd.dword3.bits.width = region->pitch - 1; /* XXX: width ? */ - bd.dword3.bits.height = region->height - 1; - - bd.dword4.bits.min_array_element = 0; - bd.dword4.bits.depth = 0; - - BRW_CACHED_BATCH_STRUCT(brw, &bd); + BEGIN_BATCH(5, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (5 - 2)); + OUT_BATCH(((region->pitch * region->cpp) - 1) | + (format << 18) | + (BRW_TILEWALK_YMAJOR << 26) | + (region->tiled << 27) | + (BRW_SURFACE_2D << 29)); + OUT_RELOC(region->buffer, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, 0); + OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) | + ((region->pitch - 1) << 6) | + ((region->height - 1) << 19)); + OUT_BATCH(0); + ADVANCE_BATCH(); } const struct brw_tracked_state brw_depthbuffer = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_CONTEXT | BRW_NEW_LOCK, - .cache = 0 - }, - .update = upload_depthbuffer + .update = upload_depthbuffer, + .always_update = GL_TRUE, }; @@ -494,40 +474,37 @@ const struct brw_tracked_state brw_invarient_state = { .update = upload_invarient_state }; - -/* State pool addresses: +/** + * Define the base addresses which some state is referenced from. + * + * This allows us to avoid having to emit relocations in many places for + * cached state, and instead emit pointers inside of large, mostly-static + * state pools. This comes at the expense of memory, and more expensive cache + * misses. */ static void upload_state_base_address( struct brw_context *brw ) { struct intel_context *intel = &brw->intel; - struct brw_state_base_address sba; - - memset(&sba, 0, sizeof(sba)); - - sba.header.opcode = CMD_STATE_BASE_ADDRESS; - sba.header.length = 0x4; - - /* BRW_NEW_LOCK */ - sba.bits0.general_state_address = bmBufferOffset(intel, brw->pool[BRW_GS_POOL].buffer) >> 5; - sba.bits0.modify_enable = 1; - - /* BRW_NEW_LOCK */ - sba.bits1.surface_state_address = bmBufferOffset(intel, brw->pool[BRW_SS_POOL].buffer) >> 5; - sba.bits1.modify_enable = 1; - sba.bits2.modify_enable = 1; - sba.bits3.modify_enable = 1; - sba.bits4.modify_enable = 1; - - BRW_CACHED_BATCH_STRUCT(brw, &sba); + /* Output the structure (brw_state_base_address) directly to the + * batchbuffer, so we can emit relocations inline. + */ + BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); + OUT_RELOC(brw->pool[BRW_GS_POOL].buffer, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + 1); /* General state base address */ + OUT_RELOC(brw->pool[BRW_SS_POOL].buffer, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + 1); /* Surface state base address */ + OUT_BATCH(1); /* Indirect object base address */ + OUT_BATCH(1); /* General state upper bound */ + OUT_BATCH(1); /* Indirect object upper bound */ + ADVANCE_BATCH(); } const struct brw_tracked_state brw_state_base_address = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_CONTEXT | BRW_NEW_LOCK, - .cache = 0 - }, + .always_update = GL_TRUE, .update = upload_state_base_address }; -- cgit v1.2.3