From e3a6e60040b7f6ea7965e52f8f9881ed31e0347c Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 7 Dec 2007 16:15:49 -0800
Subject: [965] Convert the driver to dri_bufmgr interface and enable TTM.

This is currently believed to work but be a significant performance loss.
Performance recovery should be soon to follow.

The dri_bo_fake_disable_backing_store() call was added to allow backing store
disable like bufmgr_fake.c did, which is a significant performance win (though
it's missing the no-fence-subdata part).

This commit is a squash merge of the 965-ttm branch, which had some history
I wanted to avoid pulling due to noisiness and brokenness at many points
for git-bisecting.
---
 src/mesa/drivers/dri/i965/brw_misc_state.c | 143 ++++++++++++-----------------
 1 file changed, 60 insertions(+), 83 deletions(-)

(limited to 'src/mesa/drivers/dri/i965/brw_misc_state.c')

diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index d5779680ff..210745c63b 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -123,21 +123,18 @@ const struct brw_tracked_state brw_drawing_rect = {
    .update = upload_drawing_rect
 };
 
-/***********************************************************************
- * Binding table pointers
+/**
+ * Upload the binding table pointers, which point each stage's array of surface
+ * state pointers.
+ *
+ * The binding table pointers are relative to the surface state base address,
+ * which is the BRW_SS_POOL cache buffer.
  */
-
 static void upload_binding_table_pointers(struct brw_context *brw)
 {
    struct brw_binding_table_pointers btp;
    memset(&btp, 0, sizeof(btp));
 
-   /* The binding table has been emitted to the SS pool already, so we
-    * know what its offset is.  When the batch buffer is fired, the
-    * binding table and surface structs will get fixed up to point to
-    * where the textures actually landed, but that won't change the
-    * value of the offsets here:
-    */
    btp.header.opcode = CMD_BINDING_TABLE_PTRS;
    btp.header.length = sizeof(btp)/4 - 2;
    btp.vs = 0;
@@ -159,11 +156,12 @@ const struct brw_tracked_state brw_binding_table_pointers = {
 };
 
 
-/***********************************************************************
- * Pipelined state pointers.  This is the key state packet from which
- * the hardware chases pointers to all the uploaded state in VRAM.
+/**
+ * Upload pointers to the per-stage state.
+ *
+ * The state pointers in this packet are all relative to the general state
+ * base address set by CMD_STATE_BASE_ADDRESS, which is the BRW_GS_POOL buffer.
  */
-   
 static void upload_pipelined_state_pointers(struct brw_context *brw )
 {
    struct brw_pipelined_state_pointers psp;
@@ -233,71 +231,53 @@ const struct brw_tracked_state brw_psp_urb_cbs = {
    .update = upload_psp_urb_cbs
 };
 
-
-
-
-/***********************************************************************
- * Depthbuffer - currently constant, but rotation would change that.
+/**
+ * Upload the depthbuffer offset and format.
+ *
+ * We have to do this per state validation as we need to emit the relocation
+ * in the batch buffer.
  */
-
 static void upload_depthbuffer(struct brw_context *brw)
 {
-   /* 0x79050003  Depth Buffer */
    struct intel_context *intel = &brw->intel;
    struct intel_region *region = brw->state.depth_region;
-   struct brw_depthbuffer bd;
-   memset(&bd, 0, sizeof(bd));
 
-   bd.header.bits.opcode = CMD_DEPTH_BUFFER;
-   bd.header.bits.length = sizeof(bd)/4-2;
-   bd.dword1.bits.pitch = (region->pitch * region->cpp) - 1;
-   
+   unsigned int format;
+
    switch (region->cpp) {
    case 2:
-      bd.dword1.bits.format = BRW_DEPTHFORMAT_D16_UNORM;
+      format = BRW_DEPTHFORMAT_D16_UNORM;
       break;
    case 4:
       if (intel->depth_buffer_is_float)
-	 bd.dword1.bits.format = BRW_DEPTHFORMAT_D32_FLOAT;
+	 format = BRW_DEPTHFORMAT_D32_FLOAT;
       else
-	 bd.dword1.bits.format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
+	 format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
       break;
    default:
       assert(0);
       return;
    }
 
-   bd.dword1.bits.depth_offset_disable = 0; /* coordinate offset */
-
-   /* The depthbuffer can only use YMAJOR tiling...  This is a bit of
-    * a shame as it clashes with the 2d blitter which only supports
-    * XMAJOR tiling...  
-    */
-   bd.dword1.bits.tile_walk = BRW_TILEWALK_YMAJOR;
-   bd.dword1.bits.tiled_surface = intel->depth_region->tiled;
-   bd.dword1.bits.surface_type = BRW_SURFACE_2D;
-
-   /* BRW_NEW_LOCK */
-   bd.dword2_base_addr = bmBufferOffset(intel, region->buffer);    
-
-   bd.dword3.bits.mipmap_layout = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
-   bd.dword3.bits.lod = 0;
-   bd.dword3.bits.width = region->pitch - 1; /* XXX: width ? */
-   bd.dword3.bits.height = region->height - 1;
-
-   bd.dword4.bits.min_array_element = 0;
-   bd.dword4.bits.depth = 0;
-      
-   BRW_CACHED_BATCH_STRUCT(brw, &bd);
+   BEGIN_BATCH(5, INTEL_BATCH_NO_CLIPRECTS);
+   OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (5 - 2));
+   OUT_BATCH(((region->pitch * region->cpp) - 1) |
+	     (format << 18) |
+	     (BRW_TILEWALK_YMAJOR << 26) |
+	     (region->tiled << 27) |
+	     (BRW_SURFACE_2D << 29));
+   OUT_RELOC(region->buffer,
+	     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, 0);
+   OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
+	     ((region->pitch - 1) << 6) |
+	     ((region->height - 1) << 19));
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
 }
 
 const struct brw_tracked_state brw_depthbuffer = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_CONTEXT | BRW_NEW_LOCK,
-      .cache = 0
-   },
-   .update = upload_depthbuffer
+   .update = upload_depthbuffer,
+   .always_update = GL_TRUE,
 };
 
 
@@ -494,40 +474,37 @@ const struct brw_tracked_state brw_invarient_state = {
    .update = upload_invarient_state
 };
 
-
-/* State pool addresses:
+/**
+ * Define the base addresses which some state is referenced from.
+ *
+ * This allows us to avoid having to emit relocations in many places for
+ * cached state, and instead emit pointers inside of large, mostly-static
+ * state pools.  This comes at the expense of memory, and more expensive cache
+ * misses.
  */
 static void upload_state_base_address( struct brw_context *brw )
 {
    struct intel_context *intel = &brw->intel;
-   struct brw_state_base_address sba;
-      
-   memset(&sba, 0, sizeof(sba));
-
-   sba.header.opcode = CMD_STATE_BASE_ADDRESS;
-   sba.header.length = 0x4;
-
-   /* BRW_NEW_LOCK */
-   sba.bits0.general_state_address = bmBufferOffset(intel, brw->pool[BRW_GS_POOL].buffer) >> 5;
-   sba.bits0.modify_enable = 1;
-
-   /* BRW_NEW_LOCK */
-   sba.bits1.surface_state_address = bmBufferOffset(intel, brw->pool[BRW_SS_POOL].buffer) >> 5;
-   sba.bits1.modify_enable = 1;
 
-   sba.bits2.modify_enable = 1;
-   sba.bits3.modify_enable = 1;
-   sba.bits4.modify_enable = 1;
-
-   BRW_CACHED_BATCH_STRUCT(brw, &sba);
+   /* Output the structure (brw_state_base_address) directly to the
+    * batchbuffer, so we can emit relocations inline.
+    */
+   BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS);
+   OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
+   OUT_RELOC(brw->pool[BRW_GS_POOL].buffer,
+	     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+	     1); /* General state base address */
+   OUT_RELOC(brw->pool[BRW_SS_POOL].buffer,
+	     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+	     1); /* Surface state base address */
+   OUT_BATCH(1); /* Indirect object base address */
+   OUT_BATCH(1); /* General state upper bound */
+   OUT_BATCH(1); /* Indirect object upper bound */
+   ADVANCE_BATCH();
 }
 
 
 const struct brw_tracked_state brw_state_base_address = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_CONTEXT | BRW_NEW_LOCK,
-      .cache = 0
-   },
+   .always_update = GL_TRUE,
    .update = upload_state_base_address
 };
-- 
cgit v1.2.3