summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h17
-rw-r--r--src/mesa/drivers/dri/i965/brw_curbe.c31
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c20
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_glsl.c133
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c122
5 files changed, 288 insertions, 35 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index a020b621d6..01e07c967f 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -167,6 +167,9 @@ struct brw_fragment_program {
struct gl_fragment_program program;
GLuint id; /**< serial no. to identify frag progs, never re-used */
GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */
+
+ /** Program constant buffer/surface */
+ dri_bo *const_buffer;
};
@@ -238,8 +241,16 @@ struct brw_vs_ouput_sizes {
};
+/** Number of texture sampler units */
#define BRW_MAX_TEX_UNIT 16
-#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + MAX_DRAW_BUFFERS
+
+/**
+ * Size of our surface binding table.
+ * This contains pointers to the drawing surfaces and current texture
+ * objects and shader constant buffer (+1).
+ */
+#define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1)
+
enum brw_cache_id {
BRW_CC_VP,
@@ -513,8 +524,8 @@ struct brw_context
/* BRW_NEW_CURBE_OFFSETS:
*/
struct {
- GLuint wm_start;
- GLuint wm_size;
+ GLuint wm_start; /**< pos of first wm const in CURBE buffer */
+ GLuint wm_size; /**< number of float[4] consts, multiple of 16 */
GLuint clip_start;
GLuint clip_size;
GLuint vs_start;
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index 545dedd34b..39a8610952 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -38,6 +38,7 @@
#include "shader/prog_parameter.h"
#include "shader/prog_statevars.h"
#include "intel_batchbuffer.h"
+#include "intel_regions.h"
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_state.h"
@@ -251,6 +252,7 @@ static void prepare_constant_buffer(struct brw_context *brw)
_mesa_load_state_parameters(ctx, vp->program.Base.Parameters);
+ /* XXX just use a memcpy here */
for (i = 0; i < nr; i++) {
const GLfloat *value = vp->program.Base.Parameters->ParameterValues[i];
buf[offset + i * 4 + 0] = value[0];
@@ -330,11 +332,40 @@ static void prepare_constant_buffer(struct brw_context *brw)
}
+/**
+ * Vertex/fragment shader constants are stored in a pseudo 1D texture.
+ * This function updates the constants in that buffer.
+ */
+static void
+update_texture_constant_buffer(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+ struct brw_fragment_program *fp =
+ (struct brw_fragment_program *) brw->fragment_program;
+ const struct gl_program_parameter_list *params = fp->program.Base.Parameters;
+ const int size = params->NumParameters * 4 * sizeof(GLfloat);
+
+ assert(fp->const_buffer);
+ assert(fp->const_buffer->size >= size);
+
+ /* copy constants into the buffer */
+ if (size > 0) {
+ GLubyte *map;
+ dri_bo_map(fp->const_buffer, GL_TRUE);
+ map = fp->const_buffer->virtual;
+ memcpy(map, params->ParameterValues, size);
+ dri_bo_unmap(fp->const_buffer);
+ }
+}
+
+
static void emit_constant_buffer(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
GLuint sz = brw->curbe.total_size;
+ update_texture_constant_buffer(brw);
+
BEGIN_BATCH(2, IGNORE_CLIPRECTS);
if (sz == 0) {
OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index d90bd82038..457bc2fc7f 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -111,6 +111,8 @@ static void brwProgramStringNotify( GLcontext *ctx,
struct gl_program *prog )
{
struct brw_context *brw = brw_context(ctx);
+ struct intel_context *intel = &brw->intel;
+
if (target == GL_FRAGMENT_PROGRAM_ARB) {
struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
struct brw_fragment_program *newFP = brw_fragment_program(fprog);
@@ -126,6 +128,24 @@ static void brwProgramStringNotify( GLcontext *ctx,
brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
newFP->id = brw->program_id++;
newFP->isGLSL = brw_wm_is_glsl(fprog);
+
+ /* alloc constant buffer/surface */
+ {
+ const struct gl_program_parameter_list *params = prog->Parameters;
+ const int size = params->NumParameters * 4 * sizeof(GLfloat);
+
+ /* free old const buffer if too small */
+ if (newFP->const_buffer && newFP->const_buffer->size < size) {
+ dri_bo_unreference(newFP->const_buffer);
+ newFP->const_buffer = NULL;
+ }
+
+ if (!newFP->const_buffer) {
+ newFP->const_buffer = drm_intel_bo_alloc(intel->bufmgr,
+ "fp_const_buffer",
+ size, 64);
+ }
+ }
}
else if (target == GL_VERTEX_PROGRAM_ARB) {
struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
index 67ba59c325..28ef84e4aa 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -192,28 +192,41 @@ static void prealloc_reg(struct brw_wm_compile *c)
/* constants */
{
const int nr_params = c->fp->program.Base.Parameters->NumParameters;
- const struct gl_program_parameter_list *plist =
- c->fp->program.Base.Parameters;
- int index = 0;
-
- /* number of float constants */
- c->prog_data.nr_params = 4 * nr_params;
-
- /* loop over program constants (float[4]) */
- for (i = 0; i < nr_params; i++) {
- /* loop over XYZW channels */
- for (j = 0; j < 4; j++, index++) {
- reg = brw_vec1_grf(c->reg_index + index / 8, index % 8);
- /* Save pointer to parameter/constant value.
- * Constants will be copied in prepare_constant_buffer()
- */
- c->prog_data.param[index] = &plist->ParameterValues[i][j];
- set_reg(c, PROGRAM_STATE_VAR, i, j, reg);
- }
- }
- /* number of constant regs used (each reg is float[8]) */
- c->nr_creg = 2 * ((4 * nr_params + 15) / 16);
- c->reg_index += c->nr_creg;
+
+ if (1 /* XXX threshold: nr_params <= 8 */) {
+ const struct gl_program_parameter_list *plist =
+ c->fp->program.Base.Parameters;
+ int index = 0;
+
+ /* number of float constants in CURBE */
+ c->prog_data.nr_params = 4 * nr_params;
+
+ /* loop over program constants (float[4]) */
+ for (i = 0; i < nr_params; i++) {
+ /* loop over XYZW channels */
+ for (j = 0; j < 4; j++, index++) {
+ reg = brw_vec1_grf(c->reg_index + index / 8, index % 8);
+ /* Save pointer to parameter/constant value.
+ * Constants will be copied in prepare_constant_buffer()
+ */
+ c->prog_data.param[index] = &plist->ParameterValues[i][j];
+ set_reg(c, PROGRAM_STATE_VAR, i, j, reg);
+ }
+ }
+ /* number of constant regs used (each reg is float[8]) */
+ c->nr_creg = 2 * ((4 * nr_params + 15) / 16);
+ c->reg_index += c->nr_creg;
+ }
+ else {
+ /* number of float constants in CURBE */
+ c->prog_data.nr_params = 0;
+
+ /* When there's a lot of FP constanst we'll store them in a
+ * texture-like buffer instead of using the CURBE buffer.
+ * This means we won't use GRF registers for constants and we'll
+ * have to fetch constants with a dataport read.
+ */
+ }
}
/* fragment shader inputs */
@@ -892,6 +905,19 @@ static void emit_add(struct brw_wm_compile *c,
brw_set_saturate(p, 0);
}
+static void emit_arl(struct brw_wm_compile *c,
+ struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, addr_reg;
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ addr_reg = brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_ADDRESS, 0);
+ src0 = get_src_reg(c, &inst->SrcReg[0], 0); /* channel 0 */
+ brw_MOV(p, addr_reg, src0);
+ brw_set_saturate(p, 0);
+}
+
static void emit_sub(struct brw_wm_compile *c,
struct prog_instruction *inst)
{
@@ -2331,9 +2357,10 @@ static void emit_txb(struct brw_wm_compile *c,
struct brw_compile *p = &c->func;
struct brw_reg dst[4], src[4], payload_reg;
GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
-
GLuint i;
+
payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
+
for (i = 0; i < 4; i++)
dst[i] = get_dst_reg(c, inst, i);
for (i = 0; i < 4; i++)
@@ -2372,13 +2399,13 @@ static void emit_txb(struct brw_wm_compile *c,
0); /* eot */
}
+
static void emit_tex(struct brw_wm_compile *c,
struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
struct brw_reg dst[4], src[4], payload_reg;
GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
-
GLuint msg_len;
GLuint i, nr;
GLuint emit;
@@ -2419,7 +2446,7 @@ static void emit_tex(struct brw_wm_compile *c,
}
if (shadow) {
- brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); /* lod / bais */
+ brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); /* lod / bias */
brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */
}
@@ -2439,6 +2466,49 @@ static void emit_tex(struct brw_wm_compile *c,
brw_MOV(p, dst[3], brw_imm_f(1.0));
}
+
+static void emit_get_constant(struct brw_context *brw,
+ struct brw_wm_compile *c,
+ struct prog_instruction *inst,
+ GLuint constIndex)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg dst[4];
+ GLuint i;
+ const int mark = mark_tmps( c );
+ struct brw_reg writeback_reg[4];
+
+ /* XXX only need 1 temp reg??? */
+ for (i = 0; i < 4; i++) {
+ writeback_reg[i] = alloc_tmp(c);
+ }
+
+ for (i = 0; i < 4; i++) {
+ dst[i] = get_dst_reg(c, inst, i);
+ }
+
+ /* Get float[4] vector from constant buffer */
+ brw_dp_READ_4(p,
+ writeback_reg[0], /* first writeback dest */
+ 1, /* msg_reg */
+ GL_FALSE, /* rel addr? */
+ 16 * constIndex, /* byte offset */
+ BRW_WM_MAX_SURF - 1 /* surface, binding table index */
+ );
+
+ /* Extract the four channel values, smear across dest registers */
+ for (i = 0; i < 4; i++) {
+ /* extract 1 float from the writeback reg */
+ struct brw_reg new_src = stride(writeback_reg[0], 0, 1, 0);
+ new_src.subnr = i * 4;
+ /* and smear it into the dest register */
+ brw_MOV(p, dst[i], new_src);
+ }
+
+ release_tmps( c, mark );
+}
+
+
/**
* Resolve subroutine calls after code emit is done.
*/
@@ -2504,6 +2574,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
case OPCODE_ADD:
emit_add(c, inst);
break;
+ case OPCODE_ARL:
+ emit_arl(c, inst);
+ break;
case OPCODE_SUB:
emit_sub(c, inst);
break;
@@ -2520,7 +2593,17 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
emit_trunc(c, inst);
break;
case OPCODE_MOV:
+#if 0
+ /* test hook for new constant buffer code */
+ if (inst->SrcReg[0].File == PROGRAM_UNIFORM) {
+ emit_get_constant(brw, c, inst, inst->SrcReg[0].Index);
+ }
+ else {
+ emit_mov(c, inst);
+ }
+#else
emit_mov(c, inst);
+#endif
break;
case OPCODE_DP3:
emit_dp3(c, inst);
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 1fc537ca20..e7d55d5dbd 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -33,6 +33,7 @@
#include "main/mtypes.h"
#include "main/texformat.h"
#include "main/texstore.h"
+#include "shader/prog_parameter.h"
#include "intel_mipmap_tree.h"
#include "intel_batchbuffer.h"
@@ -287,6 +288,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
struct intel_texture_object *intelObj = intel_texture_object(tObj);
struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel];
struct brw_wm_surface_key key;
+ const GLuint j = MAX_DRAW_BUFFERS + unit;
memset(&key, 0, sizeof(key));
@@ -313,16 +315,111 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
key.cpp = intelObj->mt->cpp;
key.tiling = intelObj->mt->region->tiling;
- dri_bo_unreference(brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS]);
- brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
- &key, sizeof(key),
- &key.bo, key.bo ? 1 : 0,
- NULL);
- if (brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] == NULL) {
- brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] = brw_create_texture_surface(brw, &key);
+ dri_bo_unreference(brw->wm.surf_bo[j]);
+ brw->wm.surf_bo[j] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
+ &key, sizeof(key),
+ &key.bo, key.bo ? 1 : 0,
+ NULL);
+ if (brw->wm.surf_bo[j] == NULL) {
+ brw->wm.surf_bo[j] = brw_create_texture_surface(brw, &key);
}
}
+
+
+/**
+ * Create the constant buffer surface. Fragment shader constanst will be
+ * read from this buffer with Data Port Read instructions/messages.
+ */
+static dri_bo *
+brw_create_constant_surface( struct brw_context *brw,
+ struct brw_wm_surface_key *key )
+{
+ const GLint w = key->width - 1;
+ struct brw_surface_state surf;
+ dri_bo *bo;
+
+ memset(&surf, 0, sizeof(surf));
+
+ surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
+ surf.ss0.surface_type = BRW_SURFACE_BUFFER;
+ surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+
+ /* This is ok for all textures with channel width 8bit or less:
+ */
+ assert(key->bo);
+ if (key->bo)
+ surf.ss1.base_addr = key->bo->offset; /* reloc */
+ else
+ surf.ss1.base_addr = key->offset;
+
+ surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */
+ surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */
+ surf.ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */
+ surf.ss3.pitch = (key->pitch * key->cpp) - 1;
+ brw_set_surface_tiling(&surf, key->tiling);
+
+ bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
+ key, sizeof(*key),
+ &key->bo, key->bo ? 1 : 0,
+ &surf, sizeof(surf),
+ NULL, NULL);
+
+ if (key->bo) {
+ /* Emit relocation to surface contents */
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_SAMPLER, 0,
+ 0,
+ offsetof(struct brw_surface_state, ss1),
+ key->bo);
+ }
+
+ return bo;
+}
+
+
+/**
+ * Update the constant buffer surface.
+ */
+static void
+brw_update_constant_surface( GLcontext *ctx,
+ const struct brw_fragment_program *fp )
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_wm_surface_key key;
+ const GLuint j = BRW_WM_MAX_SURF - 1;
+ const GLuint numParams = fp->program.Base.Parameters->NumParameters;
+
+ memset(&key, 0, sizeof(key));
+
+ key.format = MESA_FORMAT_RGBA_FLOAT32;
+ key.internal_format = GL_RGBA;
+ key.bo = fp->const_buffer;
+
+ key.depthmode = GL_NONE;
+ key.pitch = numParams;
+ key.width = numParams;
+ key.height = 1;
+ key.depth = 1;
+ key.cpp = 16;
+
+ /*
+ printf("%s:\n", __FUNCTION__);
+ printf(" width %d height %d depth %d cpp %d pitch %d\n",
+ key.width, key.height, key.depth, key.cpp, key.pitch);
+ */
+
+ dri_bo_unreference(brw->wm.surf_bo[j]);
+ brw->wm.surf_bo[j] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
+ &key, sizeof(key),
+ &key.bo, key.bo ? 1 : 0,
+ NULL);
+ if (brw->wm.surf_bo[j] == NULL) {
+ brw->wm.surf_bo[j] = brw_create_constant_surface(brw, &key);
+ }
+}
+
+
/**
* Sets up a surface state structure to point at the given region.
* While it is only used for the front/back buffer currently, it should be
@@ -514,6 +611,17 @@ static void prepare_wm_surfaces(struct brw_context *brw )
}
}
+ /* Update surface for fragment shader constant buffer */
+ {
+ const GLuint j = BRW_WM_MAX_SURF - 1;
+ const struct brw_fragment_program *fp =
+ brw_fragment_program_const(brw->fragment_program);
+
+ brw_update_constant_surface(ctx, fp);
+ brw->wm.nr_surfaces = j + 1;
+ }
+
+
dri_bo_unreference(brw->wm.bind_bo);
brw->wm.bind_bo = brw_wm_get_binding_table(brw);