summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/i965
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r--src/mesa/drivers/dri/i965/Makefile2
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h54
-rw-r--r--src/mesa/drivers/dri/i965/brw_curbe.c53
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw_upload.c16
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h12
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c84
-rw-r--r--src/mesa/drivers/dri/i965/brw_misc_state.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c25
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf_state.c9
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h25
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_cache.c159
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_dump.c20
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c50
-rw-r--r--src/mesa/drivers/dri/i965/brw_tex_layout.c70
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.h5
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_constval.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c299
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_state.c8
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_surface_state.c226
-rw-r--r--src/mesa/drivers/dri/i965/brw_vtbl.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.c12
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.h10
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c21
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_fp.c13
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_glsl.c493
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_iz.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_pass0.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_pass1.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_pass2.c21
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_sampler_state.c12
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c294
l---------src/mesa/drivers/dri/i965/intel_generatemipmap.c1
34 files changed, 1435 insertions, 583 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index 2934414d99..9712c38725 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -14,6 +14,7 @@ DRIVER_SOURCES = \
intel_decode.c \
intel_extensions.c \
intel_fbo.c \
+ intel_generatemipmap.c \
intel_mipmap_tree.c \
intel_regions.c \
intel_screen.c \
@@ -69,6 +70,7 @@ DRIVER_SOURCES = \
brw_vs_constval.c \
brw_vs_emit.c \
brw_vs_state.c \
+ brw_vs_surface_state.c \
brw_vtbl.c \
brw_wm.c \
brw_wm_debug.c \
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index d96ff29310..4dbe551d83 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -118,6 +118,8 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
ctx->Const.MaxCubeTextureLevels = 12;
ctx->Const.MaxTextureRectSize = (1<<12);
+ ctx->Const.MaxTextureMaxAnisotropy = 16.0;
+
/* if conformance mode is set, swrast can handle any size AA point */
ctx->Const.MaxPointSizeAA = 255.0;
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 01e07c967f..873fc8ffff 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -131,6 +131,7 @@ struct brw_context;
#define BRW_NEW_WM_INPUT_DIMENSIONS 0x100
#define BRW_NEW_INPUT_VARYING 0x200
#define BRW_NEW_PSP 0x800
+#define BRW_NEW_WM_SURFACES 0x1000
#define BRW_NEW_FENCE 0x2000
#define BRW_NEW_INDICES 0x4000
#define BRW_NEW_VERTICES 0x8000
@@ -141,7 +142,8 @@ struct brw_context;
#define BRW_NEW_BATCH 0x10000
/** brw->depth_region updated */
#define BRW_NEW_DEPTH_BUFFER 0x20000
-#define BRW_NEW_NR_SURFACES 0x40000
+#define BRW_NEW_NR_WM_SURFACES 0x40000
+#define BRW_NEW_NR_VS_SURFACES 0x80000
struct brw_state_flags {
/** State update flags signalled by mesa internals */
@@ -159,6 +161,8 @@ struct brw_state_flags {
struct brw_vertex_program {
struct gl_vertex_program program;
GLuint id;
+ dri_bo *const_buffer; /** Program constant buffer/surface */
+ GLboolean use_const_buffer;
};
@@ -168,8 +172,8 @@ struct brw_fragment_program {
GLuint id; /**< serial no. to identify frag progs, never re-used */
GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */
- /** Program constant buffer/surface */
- dri_bo *const_buffer;
+ dri_bo *const_buffer; /** Program constant buffer/surface */
+ GLboolean use_const_buffer;
};
@@ -186,7 +190,7 @@ struct brw_wm_prog_data {
GLuint total_grf;
GLuint total_scratch;
- GLuint nr_params;
+ GLuint nr_params; /**< number of float params/constants */
GLboolean error;
/* Pointer to tracked values (only valid once
@@ -225,6 +229,7 @@ struct brw_vs_prog_data {
GLuint urb_read_length;
GLuint total_grf;
GLuint outputs_written;
+ GLuint nr_params; /**< number of float params/constants */
GLuint inputs_read;
@@ -241,16 +246,38 @@ struct brw_vs_ouput_sizes {
};
+/** Number of general purpose registers (VS, WM, etc) */
+#define BRW_MAX_GRF 128
+
/** Number of texture sampler units */
#define BRW_MAX_TEX_UNIT 16
/**
- * Size of our surface binding table.
+ * Size of our surface binding table for the WM.
* This contains pointers to the drawing surfaces and current texture
- * objects and shader constant buffer (+1).
+ * objects and shader constant buffers (+2).
*/
#define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1)
+/**
+ * Helpers to convert drawing buffers, textures and constant buffers
+ * to surface binding table indexes, for WM.
+ */
+#define SURF_INDEX_DRAW(d) (d)
+#define SURF_INDEX_FRAG_CONST_BUFFER (MAX_DRAW_BUFFERS)
+#define SURF_INDEX_TEXTURE(t) (MAX_DRAW_BUFFERS + 1 + (t))
+
+/**
+ * Size of surface binding table for the VS.
+ * Only one constant buffer for now.
+ */
+#define BRW_VS_MAX_SURF 1
+
+/**
+ * Only a VS constant buffer
+ */
+#define SURF_INDEX_VERT_CONST_BUFFER 0
+
enum brw_cache_id {
BRW_CC_VP,
@@ -427,8 +454,6 @@ struct brw_context
struct {
struct brw_state_flags dirty;
- struct brw_tracked_state **atoms;
- GLuint nr_atoms;
GLuint nr_color_regions;
struct intel_region *color_regions[MAX_DRAW_BUFFERS];
@@ -448,7 +473,8 @@ struct brw_context
int validated_bo_count;
} state;
- struct brw_cache cache;
+ struct brw_cache cache; /** non-surface items */
+ struct brw_cache surface_cache; /* surface items */
struct brw_cached_batch_item *cached_batch_items;
struct {
@@ -532,11 +558,6 @@ struct brw_context
GLuint vs_size;
GLuint total_size;
- /* Dynamic tracker which changes to reflect the state referenced
- * by active fp and vp program parameters:
- */
- struct brw_tracked_state tracked_state;
-
dri_bo *curbe_bo;
/** Offset within curbe_bo of space for current curbe entry */
GLuint curbe_offset;
@@ -557,6 +578,11 @@ struct brw_context
dri_bo *prog_bo;
dri_bo *state_bo;
+
+ /** Binding table of pointers to surf_bo entries */
+ dri_bo *bind_bo;
+ dri_bo *surf_bo[BRW_VS_MAX_SURF];
+ GLuint nr_surfaces;
} vs;
struct {
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index a6bfb7507e..a1a6c53d0e 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -36,6 +36,7 @@
#include "main/macros.h"
#include "main/enums.h"
#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
#include "shader/prog_statevars.h"
#include "intel_batchbuffer.h"
#include "intel_regions.h"
@@ -45,17 +46,21 @@
#include "brw_util.h"
-/* Partition the CURBE between the various users of constant values:
+/**
+ * Partition the CURBE between the various users of constant values:
+ * Note that vertex and fragment shaders can now fetch constants out
+ * of constant buffers. We no longer allocatea block of the GRF for
+ * constants. That greatly reduces the demand for space in the CURBE.
+ * Some of the comments within are dated...
*/
static void calculate_curbe_offsets( struct brw_context *brw )
{
GLcontext *ctx = &brw->intel.ctx;
/* CACHE_NEW_WM_PROG */
- GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16;
+ const GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16;
/* BRW_NEW_VERTEX_PROGRAM */
- const struct brw_vertex_program *vp = brw_vertex_program_const(brw->vertex_program);
- GLuint nr_vp_regs = (vp->program.Base.Parameters->NumParameters * 4 + 15) / 16;
+ const GLuint nr_vp_regs = (brw->vs.prog_data->nr_params + 15) / 16;
GLuint nr_clip_regs = 0;
GLuint total_regs;
@@ -184,13 +189,6 @@ static void prepare_constant_buffer(struct brw_context *brw)
GLfloat *buf;
GLuint i;
- /* Update our own dependency flags. This works because this
- * function will also be called whenever fp or vp changes.
- */
- brw->curbe.tracked_state.dirty.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION);
- brw->curbe.tracked_state.dirty.mesa |= vp->program.Base.Parameters->StateFlags;
- brw->curbe.tracked_state.dirty.mesa |= fp->program.Base.Parameters->StateFlags;
-
if (sz == 0) {
if (brw->curbe.last_buf) {
free(brw->curbe.last_buf);
@@ -248,7 +246,7 @@ static void prepare_constant_buffer(struct brw_context *brw)
/* vertex shader constants */
if (brw->curbe.vs_size) {
GLuint offset = brw->curbe.vs_start * 16;
- GLuint nr = vp->program.Base.Parameters->NumParameters;
+ GLuint nr = brw->vs.prog_data->nr_params / 4;
_mesa_load_state_parameters(ctx, vp->program.Base.Parameters);
@@ -331,40 +329,11 @@ static void prepare_constant_buffer(struct brw_context *brw)
*/
}
-
-/**
- * Vertex/fragment shader constants are stored in a pseudo 1D texture.
- * This function updates the constants in that buffer.
- */
-static void
-update_texture_constant_buffer(struct brw_context *brw)
-{
- struct brw_fragment_program *fp =
- (struct brw_fragment_program *) brw->fragment_program;
- const struct gl_program_parameter_list *params = fp->program.Base.Parameters;
- const int size = params->NumParameters * 4 * sizeof(GLfloat);
-
- assert(fp->const_buffer);
- assert(fp->const_buffer->size >= size);
-
- /* copy constants into the buffer */
- if (size > 0) {
- GLubyte *map;
- dri_bo_map(fp->const_buffer, GL_TRUE);
- map = fp->const_buffer->virtual;
- memcpy(map, params->ParameterValues, size);
- dri_bo_unmap(fp->const_buffer);
- }
-}
-
-
static void emit_constant_buffer(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
GLuint sz = brw->curbe.total_size;
- update_texture_constant_buffer(brw);
-
BEGIN_BATCH(2, IGNORE_CLIPRECTS);
if (sz == 0) {
OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
@@ -386,7 +355,7 @@ static void emit_constant_buffer(struct brw_context *brw)
*/
const struct brw_tracked_state brw_constant_buffer = {
.dirty = {
- .mesa = (_NEW_TRANSFORM|_NEW_PROJECTION), /* plus fp and vp flags */
+ .mesa = _NEW_PROGRAM_CONSTANTS,
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_VERTEX_PROGRAM |
BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index b91b20bec6..1b8bcc14ec 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -343,7 +343,7 @@ static void brw_prepare_vertices(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
struct intel_context *intel = intel_context(ctx);
- GLuint tmp = brw->vs.prog_data->inputs_read;
+ GLbitfield vs_inputs = brw->vs.prog_data->inputs_read;
GLuint i;
const unsigned char *ptr = NULL;
GLuint interleave = 0;
@@ -362,11 +362,11 @@ static void brw_prepare_vertices(struct brw_context *brw)
_mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
/* Accumulate the list of enabled arrays. */
- while (tmp) {
- GLuint i = _mesa_ffsll(tmp)-1;
+ while (vs_inputs) {
+ GLuint i = _mesa_ffsll(vs_inputs) - 1;
struct brw_vertex_element *input = &brw->vb.inputs[i];
- tmp &= ~(1<<i);
+ vs_inputs &= ~(1 << i);
enabled[nr_enabled++] = input;
}
@@ -477,17 +477,17 @@ static void brw_emit_vertices(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
struct intel_context *intel = intel_context(ctx);
- GLuint tmp = brw->vs.prog_data->inputs_read;
+ GLbitfield vs_inputs = brw->vs.prog_data->inputs_read;
struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
GLuint i;
GLuint nr_enabled = 0;
/* Accumulate the list of enabled arrays. */
- while (tmp) {
- i = _mesa_ffsll(tmp)-1;
+ while (vs_inputs) {
+ i = _mesa_ffsll(vs_inputs) - 1;
struct brw_vertex_element *input = &brw->vb.inputs[i];
- tmp &= ~(1<<i);
+ vs_inputs &= ~(1 << i);
enabled[nr_enabled++] = input;
}
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index d05f2e6c41..bc7756ceab 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -97,7 +97,7 @@ struct brw_glsl_call;
#define BRW_EU_MAX_INSN_STACK 5
-#define BRW_EU_MAX_INSN 4000
+#define BRW_EU_MAX_INSN 10000
struct brw_compile {
struct brw_instruction store[BRW_EU_MAX_INSN];
@@ -862,9 +862,17 @@ void brw_dp_READ_4( struct brw_compile *p,
struct brw_reg dest,
GLuint msg_reg_nr,
GLboolean relAddr,
- GLuint scratch_offset,
+ GLuint location,
GLuint bind_table_index );
+void brw_dp_READ_4_vs( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint oword,
+ GLboolean relAddr,
+ struct brw_reg addrReg,
+ GLuint location,
+ GLuint bind_table_index );
+
void brw_dp_WRITE_16( struct brw_compile *p,
struct brw_reg src,
GLuint msg_reg_nr,
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 21ce8369db..60ea44f7a9 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -952,7 +952,7 @@ void brw_dp_READ_16( struct brw_compile *p,
/**
* Read a float[4] vector from the data port Data Cache (const buffer).
- * Scratch offset should be a multiple of 16.
+ * Location (in buffer) should be a multiple of 16.
* Used for fetching shader constants.
* If relAddr is true, we'll do an indirect fetch using the address register.
*/
@@ -960,7 +960,7 @@ void brw_dp_READ_4( struct brw_compile *p,
struct brw_reg dest,
GLuint msg_reg_nr,
GLboolean relAddr,
- GLuint scratch_offset,
+ GLuint location,
GLuint bind_table_index )
{
{
@@ -969,18 +969,20 @@ void brw_dp_READ_4( struct brw_compile *p,
brw_set_mask_control(p, BRW_MASK_DISABLE);
/* set message header global offset field (reg 0, element 2) */
+ /* Note that grf[0] will be copied to mrf[1] implicitly by the SEND instr */
brw_MOV(p,
retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
- brw_imm_d(scratch_offset));
+ brw_imm_d(location));
brw_pop_insn_state(p);
}
{
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
- insn->header.predicate_control = 0; /* XXX */
+ insn->header.predicate_control = BRW_PREDICATE_NONE;
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.mask_control = BRW_MASK_DISABLE;
/* cast dest to a uword[8] vector */
dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
@@ -989,7 +991,7 @@ void brw_dp_READ_4( struct brw_compile *p,
brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
brw_set_dp_read_message(insn,
- bind_table_index, /* binding table index (255=stateless) */
+ bind_table_index,
0, /* msg_control (0 means 1 Oword) */
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
0, /* source cache = data cache */
@@ -1000,6 +1002,78 @@ void brw_dp_READ_4( struct brw_compile *p,
}
+/**
+ * Read float[4] constant(s) from VS constant buffer.
+ * For relative addressing, two float[4] constants will be read into 'dest'.
+ * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
+ */
+void brw_dp_READ_4_vs(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint oword,
+ GLboolean relAddr,
+ struct brw_reg addrReg,
+ GLuint location,
+ GLuint bind_table_index)
+{
+ GLuint msg_reg_nr = 1;
+
+ assert(oword < 2);
+ /*
+ printf("vs const read msg, location %u, msg_reg_nr %d\n",
+ location, msg_reg_nr);
+ */
+
+ /* Setup MRF[1] with location/offset into const buffer */
+ {
+ struct brw_reg b;
+
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ /*brw_set_access_mode(p, BRW_ALIGN_16);*/
+
+ /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
+ * when the docs say only dword[2] should be set. Hmmm. But it works.
+ */
+ b = brw_message_reg(msg_reg_nr);
+ b = retype(b, BRW_REGISTER_TYPE_UD);
+ /*b = get_element_ud(b, 2);*/
+ if (relAddr) {
+ brw_ADD(p, b, addrReg, brw_imm_ud(location));
+ }
+ else {
+ brw_MOV(p, b, brw_imm_ud(location));
+ }
+
+ brw_pop_insn_state(p);
+ }
+
+ {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = BRW_PREDICATE_NONE;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.mask_control = BRW_MASK_DISABLE;
+ /*insn->header.access_mode = BRW_ALIGN_16;*/
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, brw_null_reg());
+
+ brw_set_dp_read_message(insn,
+ bind_table_index,
+ oword, /* 0 = lower Oword, 1 = upper Oword */
+ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+ 0, /* source cache = data cache */
+ 1, /* msg_length */
+ 1, /* response_length (1 Oword) */
+ 0); /* eot */
+ }
+}
+
+
+
void brw_fb_WRITE(struct brw_compile *p,
struct brw_reg dest,
GLuint msg_reg_nr,
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 5c94a49f60..4784254bc7 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -101,6 +101,7 @@ const struct brw_tracked_state brw_drawing_rect = {
static void prepare_binding_table_pointers(struct brw_context *brw)
{
+ brw_add_validated_bo(brw, brw->vs.bind_bo);
brw_add_validated_bo(brw, brw->wm.bind_bo);
}
@@ -117,13 +118,14 @@ static void upload_binding_table_pointers(struct brw_context *brw)
BEGIN_BATCH(6, IGNORE_CLIPRECTS);
OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
- OUT_BATCH(0); /* vs */
+ if (brw->vs.bind_bo != NULL)
+ OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */
+ else
+ OUT_BATCH(0);
OUT_BATCH(0); /* gs */
OUT_BATCH(0); /* clip */
OUT_BATCH(0); /* sf */
- OUT_RELOC(brw->wm.bind_bo,
- I915_GEM_DOMAIN_SAMPLER, 0,
- 0);
+ OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */
ADVANCE_BATCH();
}
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index 457bc2fc7f..bac69187c1 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -95,6 +95,12 @@ static struct gl_program *brwNewProgram( GLcontext *ctx,
static void brwDeleteProgram( GLcontext *ctx,
struct gl_program *prog )
{
+ if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+ struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
+ struct brw_fragment_program *brw_fprog = brw_fragment_program(fprog);
+ dri_bo_unreference(brw_fprog->const_buffer);
+ }
+
_mesa_delete_program( ctx, prog );
}
@@ -111,7 +117,6 @@ static void brwProgramStringNotify( GLcontext *ctx,
struct gl_program *prog )
{
struct brw_context *brw = brw_context(ctx);
- struct intel_context *intel = &brw->intel;
if (target == GL_FRAGMENT_PROGRAM_ARB) {
struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
@@ -128,24 +133,6 @@ static void brwProgramStringNotify( GLcontext *ctx,
brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
newFP->id = brw->program_id++;
newFP->isGLSL = brw_wm_is_glsl(fprog);
-
- /* alloc constant buffer/surface */
- {
- const struct gl_program_parameter_list *params = prog->Parameters;
- const int size = params->NumParameters * 4 * sizeof(GLfloat);
-
- /* free old const buffer if too small */
- if (newFP->const_buffer && newFP->const_buffer->size < size) {
- dri_bo_unreference(newFP->const_buffer);
- newFP->const_buffer = NULL;
- }
-
- if (!newFP->const_buffer) {
- newFP->const_buffer = drm_intel_bo_alloc(intel->bufmgr,
- "fp_const_buffer",
- size, 64);
- }
- }
}
else if (target == GL_VERTEX_PROGRAM_ARB) {
struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog;
diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c
index fc4eddda0a..c99918724b 100644
--- a/src/mesa/drivers/dri/i965/brw_sf_state.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_state.c
@@ -66,7 +66,9 @@ static void upload_sf_vp(struct brw_context *brw)
sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias;
sfv.viewport.m32 = v[MAT_TZ] * depth_scale;
- /* _NEW_SCISSOR */
+ /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT
+ * for DrawBuffer->_[XY]{min,max}
+ */
/* The scissor only needs to handle the intersection of drawable and
* scissor rect. Clipping to the boundaries of static shared buffers
@@ -97,7 +99,8 @@ static void upload_sf_vp(struct brw_context *brw)
const struct brw_tracked_state brw_sf_vp = {
.dirty = {
.mesa = (_NEW_VIEWPORT |
- _NEW_SCISSOR),
+ _NEW_SCISSOR |
+ _NEW_BUFFERS),
.brw = 0,
.cache = 0
},
@@ -147,7 +150,7 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key)
key->line_smooth = ctx->Line.SmoothFlag;
key->point_sprite = ctx->Point.PointSprite;
- key->point_size = ctx->Point.Size;
+ key->point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
key->point_attenuated = ctx->Point._Attenuated;
key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 81b0a45998..bf9f6cae55 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -72,11 +72,13 @@ const struct brw_tracked_state brw_sf_vp;
const struct brw_tracked_state brw_state_base_address;
const struct brw_tracked_state brw_urb_fence;
const struct brw_tracked_state brw_vertex_state;
+const struct brw_tracked_state brw_vs_surfaces;
const struct brw_tracked_state brw_vs_prog;
const struct brw_tracked_state brw_vs_unit;
const struct brw_tracked_state brw_wm_input_sizes;
const struct brw_tracked_state brw_wm_prog;
const struct brw_tracked_state brw_wm_samplers;
+const struct brw_tracked_state brw_wm_constant_surface;
const struct brw_tracked_state brw_wm_surfaces;
const struct brw_tracked_state brw_wm_unit;
@@ -91,6 +93,20 @@ const struct brw_tracked_state brw_drawing_rect;
const struct brw_tracked_state brw_indices;
const struct brw_tracked_state brw_vertices;
+/**
+ * Use same key for WM and VS surfaces.
+ */
+struct brw_surface_key {
+ GLenum target, depthmode;
+ dri_bo *bo;
+ GLint format, internal_format;
+ GLint first_level, last_level;
+ GLint width, height, depth;
+ GLint pitch, cpp;
+ uint32_t tiling;
+ GLuint offset;
+};
+
/***********************************************************************
* brw_state.c
*/
@@ -135,8 +151,8 @@ dri_bo *brw_search_cache( struct brw_cache *cache,
void *aux_return);
void brw_state_cache_check_size( struct brw_context *brw );
-void brw_init_cache( struct brw_context *brw );
-void brw_destroy_cache( struct brw_context *brw );
+void brw_init_caches( struct brw_context *brw );
+void brw_destroy_caches( struct brw_context *brw );
/***********************************************************************
* brw_state_batch.c
@@ -150,4 +166,9 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
void brw_destroy_batch_cache( struct brw_context *brw );
void brw_clear_batch_cache_flush( struct brw_context *brw );
+/* brw_wm_surface_state.c */
+dri_bo *
+brw_create_constant_surface( struct brw_context *brw,
+ struct brw_surface_key *key );
+
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c
index d5b5166406..e40d7a0416 100644
--- a/src/mesa/drivers/dri/i965/brw_state_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
@@ -56,9 +56,9 @@
* incorrect program is run for the other instance.
*/
+#include "main/imports.h"
#include "brw_state.h"
#include "intel_batchbuffer.h"
-#include "main/imports.h"
/* XXX: Fixme - have to include these to get the sizes of the prog_key
* structs:
@@ -69,8 +69,10 @@
#include "brw_sf.h"
#include "brw_gs.h"
-static GLuint hash_key( const void *key, GLuint key_size,
- dri_bo **reloc_bufs, GLuint nr_reloc_bufs)
+
+static GLuint
+hash_key(const void *key, GLuint key_size,
+ dri_bo **reloc_bufs, GLuint nr_reloc_bufs)
{
GLuint *ikey = (GLuint *)key;
GLuint hash = 0, i;
@@ -95,6 +97,7 @@ static GLuint hash_key( const void *key, GLuint key_size,
return hash;
}
+
/**
* Marks a new buffer as being chosen for the given cache id.
*/
@@ -111,6 +114,7 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id,
cache->brw->state.dirty.cache |= 1 << cache_id;
}
+
static struct brw_cache_item *
search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
GLuint hash, const void *key, GLuint key_size,
@@ -143,7 +147,8 @@ search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
}
-static void rehash( struct brw_cache *cache )
+static void
+rehash(struct brw_cache *cache)
{
struct brw_cache_item **items;
struct brw_cache_item *c, *next;
@@ -164,15 +169,17 @@ static void rehash( struct brw_cache *cache )
cache->size = size;
}
+
/**
* Returns the buffer object matching cache_id and key, or NULL.
*/
-dri_bo *brw_search_cache( struct brw_cache *cache,
- enum brw_cache_id cache_id,
- const void *key,
- GLuint key_size,
- dri_bo **reloc_bufs, GLuint nr_reloc_bufs,
- void *aux_return )
+dri_bo *
+brw_search_cache(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *key,
+ GLuint key_size,
+ dri_bo **reloc_bufs, GLuint nr_reloc_bufs,
+ void *aux_return)
{
struct brw_cache_item *item;
GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs);
@@ -192,6 +199,7 @@ dri_bo *brw_search_cache( struct brw_cache *cache,
return item->bo;
}
+
dri_bo *
brw_upload_cache( struct brw_cache *cache,
enum brw_cache_id cache_id,
@@ -265,7 +273,9 @@ brw_upload_cache( struct brw_cache *cache,
return bo;
}
-/* This doesn't really work with aux data. Use search/upload instead
+
+/**
+ * This doesn't really work with aux data. Use search/upload instead
*/
dri_bo *
brw_cache_data_sz(struct brw_cache *cache,
@@ -296,6 +306,7 @@ brw_cache_data_sz(struct brw_cache *cache,
return bo;
}
+
/**
* Wrapper around brw_cache_data_sz using the cache_id's canonical key size.
*
@@ -319,21 +330,22 @@ enum pool_type {
DW_GENERAL_STATE
};
+
static void
-brw_init_cache_id( struct brw_context *brw,
- const char *name,
- enum brw_cache_id id,
- GLuint key_size,
- GLuint aux_size)
+brw_init_cache_id(struct brw_cache *cache,
+ const char *name,
+ enum brw_cache_id id,
+ GLuint key_size,
+ GLuint aux_size)
{
- struct brw_cache *cache = &brw->cache;
-
cache->name[id] = strdup(name);
cache->key_size[id] = key_size;
cache->aux_size[id] = aux_size;
}
-void brw_init_cache( struct brw_context *brw )
+
+static void
+brw_init_non_surface_cache(struct brw_context *brw)
{
struct brw_cache *cache = &brw->cache;
@@ -342,114 +354,136 @@ void brw_init_cache( struct brw_context *brw )
cache->size = 7;
cache->n_items = 0;
cache->items = (struct brw_cache_item **)
- _mesa_calloc(cache->size *
- sizeof(struct brw_cache_item));
+ _mesa_calloc(cache->size * sizeof(struct brw_cache_item));
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"CC_VP",
BRW_CC_VP,
sizeof(struct brw_cc_viewport),
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"CC_UNIT",
BRW_CC_UNIT,
sizeof(struct brw_cc_unit_state),
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"WM_PROG",
BRW_WM_PROG,
sizeof(struct brw_wm_prog_key),
sizeof(struct brw_wm_prog_data));
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"SAMPLER_DEFAULT_COLOR",
BRW_SAMPLER_DEFAULT_COLOR,
sizeof(struct brw_sampler_default_color),
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"SAMPLER",
BRW_SAMPLER,
0, /* variable key/data size */
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"WM_UNIT",
BRW_WM_UNIT,
sizeof(struct brw_wm_unit_state),
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"SF_PROG",
BRW_SF_PROG,
sizeof(struct brw_sf_prog_key),
sizeof(struct brw_sf_prog_data));
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"SF_VP",
BRW_SF_VP,
sizeof(struct brw_sf_viewport),
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"SF_UNIT",
BRW_SF_UNIT,
sizeof(struct brw_sf_unit_state),
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"VS_UNIT",
BRW_VS_UNIT,
sizeof(struct brw_vs_unit_state),
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"VS_PROG",
BRW_VS_PROG,
sizeof(struct brw_vs_prog_key),
sizeof(struct brw_vs_prog_data));
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"CLIP_UNIT",
BRW_CLIP_UNIT,
sizeof(struct brw_clip_unit_state),
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"CLIP_PROG",
BRW_CLIP_PROG,
sizeof(struct brw_clip_prog_key),
sizeof(struct brw_clip_prog_data));
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"GS_UNIT",
BRW_GS_UNIT,
sizeof(struct brw_gs_unit_state),
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"GS_PROG",
BRW_GS_PROG,
sizeof(struct brw_gs_prog_key),
sizeof(struct brw_gs_prog_data));
+}
+
+
+static void
+brw_init_surface_cache(struct brw_context *brw)
+{
+ struct brw_cache *cache = &brw->surface_cache;
+
+ cache->brw = brw;
- brw_init_cache_id(brw,
+ cache->size = 7;
+ cache->n_items = 0;
+ cache->items = (struct brw_cache_item **)
+ _mesa_calloc(cache->size * sizeof(struct brw_cache_item));
+
+ brw_init_cache_id(cache,
"SS_SURFACE",
BRW_SS_SURFACE,
sizeof(struct brw_surface_state),
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"SS_SURF_BIND",
BRW_SS_SURF_BIND,
0,
0);
}
+
+void
+brw_init_caches(struct brw_context *brw)
+{
+ brw_init_non_surface_cache(brw);
+ brw_init_surface_cache(brw);
+}
+
+
static void
-brw_clear_cache( struct brw_context *brw )
+brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
{
struct brw_cache_item *c, *next;
GLuint i;
@@ -457,8 +491,8 @@ brw_clear_cache( struct brw_context *brw )
if (INTEL_DEBUG & DEBUG_STATE)
_mesa_printf("%s\n", __FUNCTION__);
- for (i = 0; i < brw->cache.size; i++) {
- for (c = brw->cache.items[i]; c; c = next) {
+ for (i = 0; i < cache->size; i++) {
+ for (c = cache->items[i]; c; c = next) {
int j;
next = c->next;
@@ -468,10 +502,10 @@ brw_clear_cache( struct brw_context *brw )
free((void *)c->key);
free(c);
}
- brw->cache.items[i] = NULL;
+ cache->items[i] = NULL;
}
- brw->cache.n_items = 0;
+ cache->n_items = 0;
if (brw->curbe.last_buf) {
_mesa_free(brw->curbe.last_buf);
@@ -483,25 +517,46 @@ brw_clear_cache( struct brw_context *brw )
brw->state.dirty.cache |= ~0;
}
-void brw_state_cache_check_size( struct brw_context *brw )
+
+void
+brw_state_cache_check_size(struct brw_context *brw)
{
+ if (INTEL_DEBUG & DEBUG_STATE)
+ _mesa_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items);
+
/* un-tuned guess. We've got around 20 state objects for a total of around
* 32k, so 1000 of them is around 1.5MB.
*/
if (brw->cache.n_items > 1000)
- brw_clear_cache(brw);
+ brw_clear_cache(brw, &brw->cache);
+
+ if (brw->surface_cache.n_items > 1000)
+ brw_clear_cache(brw, &brw->surface_cache);
}
-void brw_destroy_cache( struct brw_context *brw )
+
+static void
+brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
{
GLuint i;
- brw_clear_cache(brw);
+ if (INTEL_DEBUG & DEBUG_STATE)
+ _mesa_printf("%s\n", __FUNCTION__);
+
+ brw_clear_cache(brw, cache);
for (i = 0; i < BRW_MAX_CACHE; i++) {
- dri_bo_unreference(brw->cache.last_bo[i]);
- free(brw->cache.name[i]);
+ dri_bo_unreference(cache->last_bo[i]);
+ free(cache->name[i]);
}
- free(brw->cache.items);
- brw->cache.items = NULL;
- brw->cache.size = 0;
+ free(cache->items);
+ cache->items = NULL;
+ cache->size = 0;
+}
+
+
+void
+brw_destroy_caches(struct brw_context *brw)
+{
+ brw_destroy_cache(brw, &brw->cache);
+ brw_destroy_cache(brw, &brw->surface_cache);
}
diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c
index 5d332d010c..a713262269 100644
--- a/src/mesa/drivers/dri/i965/brw_state_dump.c
+++ b/src/mesa/drivers/dri/i965/brw_state_dump.c
@@ -84,6 +84,19 @@ get_965_surfacetype(unsigned int surfacetype)
}
}
+static const char *
+get_965_surface_format(unsigned int surface_format)
+{
+ switch (surface_format) {
+ case 0x000: return "r32g32b32a32_float";
+ case 0x0c1: return "b8g8r8a8_unorm";
+ case 0x100: return "b5g6r5_unorm";
+ case 0x102: return "b5g5r5a1_unorm";
+ case 0x104: return "b4g4r4a4_unorm";
+ default: return "unknown";
+ }
+}
+
static void dump_wm_surface_state(struct brw_context *brw)
{
int i;
@@ -95,7 +108,7 @@ static void dump_wm_surface_state(struct brw_context *brw)
char name[20];
if (surf_bo == NULL) {
- fprintf(stderr, "WM SS%d: NULL\n", i);
+ fprintf(stderr, " WM SS%d: NULL\n", i);
continue;
}
dri_bo_map(surf_bo, GL_FALSE);
@@ -103,8 +116,9 @@ static void dump_wm_surface_state(struct brw_context *brw)
surf = (struct brw_surface_state *)(surf_bo->virtual);
sprintf(name, "WM SS%d", i);
- state_out(name, surf, surfoff, 0, "%s\n",
- get_965_surfacetype(surf->ss0.surface_type));
+ state_out(name, surf, surfoff, 0, "%s %s\n",
+ get_965_surfacetype(surf->ss0.surface_type),
+ get_965_surface_format(surf->ss0.surface_format));
state_out(name, surf, surfoff, 1, "offset\n");
state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n",
surf->ss2.width + 1, surf->ss2.height + 1, surf->ss2.mip_count);
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 5de1450e61..c6dfea4743 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -59,11 +59,12 @@ const struct brw_tracked_state *atoms[] =
&brw_curbe_offsets,
&brw_recalculate_urb_fence,
-
&brw_cc_vp,
&brw_cc_unit,
- &brw_wm_surfaces, /* must do before samplers */
+ &brw_vs_surfaces, /* must do before unit */
+ &brw_wm_constant_surface, /* must do before wm surfaces/bind bo */
+ &brw_wm_surfaces, /* must do before samplers and unit */
&brw_wm_samplers,
&brw_wm_unit,
@@ -88,54 +89,26 @@ const struct brw_tracked_state *atoms[] =
&brw_line_stipple,
&brw_aa_line_parameters,
- /* Ordering of the commands below is documented as fixed.
- */
-#if 0
- &brw_pipelined_state_pointers,
- &brw_urb_fence,
- &brw_constant_buffer_state,
-#else
+
&brw_psp_urb_cbs,
-#endif
&brw_drawing_rect,
&brw_indices,
&brw_vertices,
- NULL, /* brw_constant_buffer */
+ &brw_constant_buffer
};
void brw_init_state( struct brw_context *brw )
{
- GLuint i;
-
- brw_init_cache(brw);
-
- brw->state.atoms = _mesa_malloc(sizeof(atoms));
- brw->state.nr_atoms = sizeof(atoms)/sizeof(*atoms);
- _mesa_memcpy(brw->state.atoms, atoms, sizeof(atoms));
-
- /* Patch in a pointer to the dynamic state atom:
- */
- for (i = 0; i < brw->state.nr_atoms; i++)
- if (brw->state.atoms[i] == NULL)
- brw->state.atoms[i] = &brw->curbe.tracked_state;
-
- _mesa_memcpy(&brw->curbe.tracked_state,
- &brw_constant_buffer,
- sizeof(brw_constant_buffer));
+ brw_init_caches(brw);
}
void brw_destroy_state( struct brw_context *brw )
{
- if (brw->state.atoms) {
- _mesa_free(brw->state.atoms);
- brw->state.atoms = NULL;
- }
-
- brw_destroy_cache(brw);
+ brw_destroy_caches(brw);
brw_destroy_batch_cache(brw);
}
@@ -218,6 +191,7 @@ static struct dirty_bit_map mesa_bits[] = {
DEFINE_BIT(_NEW_MULTISAMPLE),
DEFINE_BIT(_NEW_TRACK_MATRIX),
DEFINE_BIT(_NEW_PROGRAM),
+ DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
{0, 0, 0}
};
@@ -336,7 +310,7 @@ void brw_validate_state( struct brw_context *brw )
/* do prepare stage for all atoms */
for (i = 0; i < Elements(atoms); i++) {
- const struct brw_tracked_state *atom = brw->state.atoms[i];
+ const struct brw_tracked_state *atom = atoms[i];
if (brw->intel.Fallback)
break;
@@ -367,8 +341,8 @@ void brw_upload_state(struct brw_context *brw)
_mesa_memset(&examined, 0, sizeof(examined));
prev = *state;
- for (i = 0; i < brw->state.nr_atoms; i++) {
- const struct brw_tracked_state *atom = brw->state.atoms[i];
+ for (i = 0; i < Elements(atoms); i++) {
+ const struct brw_tracked_state *atom = atoms[i];
struct brw_state_flags generated;
assert(atom->dirty.mesa ||
@@ -397,7 +371,7 @@ void brw_upload_state(struct brw_context *brw)
}
else {
for (i = 0; i < Elements(atoms); i++) {
- const struct brw_tracked_state *atom = brw->state.atoms[i];
+ const struct brw_tracked_state *atom = atoms[i];
if (brw->intel.Fallback)
break;
diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c
index 51a617fcb4..5c5455813a 100644
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -28,7 +28,6 @@
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
-
/* Code to layout images in a mipmap tree for i965.
*/
@@ -40,14 +39,15 @@
#define FILE_DEBUG_FLAG DEBUG_MIPTREE
-GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_tree *mt )
+GLboolean brw_miptree_layout(struct intel_context *intel,
+ struct intel_mipmap_tree *mt,
+ uint32_t tiling)
{
- /* XXX: these vary depending on image format:
- */
-/* GLint align_w = 4; */
+ /* XXX: these vary depending on image format: */
+ /* GLint align_w = 4; */
switch (mt->target) {
- case GL_TEXTURE_CUBE_MAP:
+ case GL_TEXTURE_CUBE_MAP:
case GL_TEXTURE_3D: {
GLuint width = mt->width0;
GLuint height = mt->height0;
@@ -59,25 +59,25 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t
GLuint align_w = 4;
mt->total_height = 0;
-
+
if (mt->compressed) {
align_w = intel_compressed_alignment(mt->internal_format);
mt->pitch = ALIGN(width, align_w);
pack_y_pitch = (height + 3) / 4;
} else {
- mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0);
- pack_y_pitch = ALIGN(mt->height0, align_h);
+ mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0);
+ pack_y_pitch = ALIGN(mt->height0, align_h);
}
pack_x_pitch = mt->pitch;
pack_x_nr = 1;
- for ( level = mt->first_level ; level <= mt->last_level ; level++ ) {
+ for (level = mt->first_level ; level <= mt->last_level ; level++) {
GLuint nr_images = mt->target == GL_TEXTURE_3D ? depth : 6;
GLint x = 0;
GLint y = 0;
GLint q, j;
-
+
intel_miptree_set_level_info(mt, level, nr_images,
0, mt->total_height,
width, height, depth);
@@ -89,7 +89,7 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t
}
x = 0;
- y += pack_y_pitch;
+ y += pack_y_pitch;
}
@@ -98,40 +98,40 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t
height = minify(height);
depth = minify(depth);
- if (mt->compressed) {
- pack_y_pitch = (height + 3) / 4;
-
- if (pack_x_pitch > ALIGN(width, align_w)) {
- pack_x_pitch = ALIGN(width, align_w);
- pack_x_nr <<= 1;
- }
- } else {
- if (pack_x_pitch > 4) {
- pack_x_pitch >>= 1;
- pack_x_nr <<= 1;
- assert(pack_x_pitch * pack_x_nr <= mt->pitch);
- }
-
- if (pack_y_pitch > 2) {
- pack_y_pitch >>= 1;
- pack_y_pitch = ALIGN(pack_y_pitch, align_h);
- }
- }
+ if (mt->compressed) {
+ pack_y_pitch = (height + 3) / 4;
+
+ if (pack_x_pitch > ALIGN(width, align_w)) {
+ pack_x_pitch = ALIGN(width, align_w);
+ pack_x_nr <<= 1;
+ }
+ } else {
+ if (pack_x_pitch > 4) {
+ pack_x_pitch >>= 1;
+ pack_x_nr <<= 1;
+ assert(pack_x_pitch * pack_x_nr <= mt->pitch);
+ }
+
+ if (pack_y_pitch > 2) {
+ pack_y_pitch >>= 1;
+ pack_y_pitch = ALIGN(pack_y_pitch, align_h);
+ }
+ }
}
break;
}
default:
- i945_miptree_layout_2d(intel, mt);
+ i945_miptree_layout_2d(intel, mt, tiling);
break;
}
- DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__,
- mt->pitch,
+ DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__,
+ mt->pitch,
mt->total_height,
mt->cpp,
mt->pitch * mt->total_height * mt->cpp );
-
+
return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index 99d0e93722..1e4f66091e 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -75,6 +75,11 @@ struct brw_vs_compile {
struct brw_reg userplane[6];
+ /** we may need up to 3 constants per instruction (if use_const_buffer) */
+ struct {
+ GLint index;
+ struct brw_reg reg;
+ } current_const[3];
};
void brw_vs_emit( struct brw_vs_compile *c );
diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c
index d29eb17f8c..2637344b48 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_constval.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c
@@ -96,7 +96,7 @@ static GLubyte get_active( struct tracker *t,
struct prog_src_register src )
{
GLuint i;
- GLubyte active = src.NegateBase; /* NOTE! */
+ GLubyte active = src.Negate; /* NOTE! */
if (src.RelAddr)
return 0xf;
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 0d6c6ab9a8..d7f75e3685 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -38,14 +38,49 @@
#include "brw_vs.h"
+static struct brw_reg get_tmp( struct brw_vs_compile *c )
+{
+ struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0);
+
+ if (++c->last_tmp > c->prog_data.total_grf)
+ c->prog_data.total_grf = c->last_tmp;
+
+ return tmp;
+}
-/* Do things as simply as possible. Allocate and populate all regs
+static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp )
+{
+ if (tmp.nr == c->last_tmp-1)
+ c->last_tmp--;
+}
+
+static void release_tmps( struct brw_vs_compile *c )
+{
+ c->last_tmp = c->first_tmp;
+}
+
+
+/**
+ * Preallocate GRF register before code emit.
+ * Do things as simply as possible. Allocate and populate all regs
* ahead of time.
*/
static void brw_vs_alloc_regs( struct brw_vs_compile *c )
{
GLuint i, reg = 0, mrf;
- GLuint nr_params;
+
+ /* Determine whether to use a real constant buffer or use a block
+ * of GRF registers for constants. The later is faster but only
+ * works if everything fits in the GRF.
+ * XXX this heuristic/check may need some fine tuning...
+ */
+ if (c->vp->program.Base.Parameters->NumParameters +
+ c->vp->program.Base.NumTemporaries + 20 > BRW_MAX_GRF)
+ c->vp->use_const_buffer = GL_TRUE;
+ else
+ c->vp->use_const_buffer = GL_FALSE;
+
+ /*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/
/* r0 -- reserved as usual
*/
@@ -66,13 +101,22 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
/* Vertex program parameters from curbe:
*/
- nr_params = c->vp->program.Base.Parameters->NumParameters;
- for (i = 0; i < nr_params; i++) {
- c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
- }
- reg += (nr_params + 1) / 2;
+ if (c->vp->use_const_buffer) {
+ /* get constants from a real constant buffer */
+ c->prog_data.curb_read_length = 0;
+ c->prog_data.nr_params = 4; /* XXX 0 causes a bug elsewhere... */
+ }
+ else {
+ /* use a section of the GRF for constants */
+ GLuint nr_params = c->vp->program.Base.Parameters->NumParameters;
+ for (i = 0; i < nr_params; i++) {
+ c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
+ }
+ reg += (nr_params + 1) / 2;
+ c->prog_data.curb_read_length = reg - 1;
- c->prog_data.curb_read_length = reg - 1;
+ c->prog_data.nr_params = nr_params * 4;
+ }
/* Allocate input regs:
*/
@@ -133,6 +177,14 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
reg++;
}
+ if (c->vp->use_const_buffer) {
+ for (i = 0; i < 3; i++) {
+ c->current_const[i].index = -1;
+ c->current_const[i].reg = brw_vec8_grf(reg, 0);
+ reg++;
+ }
+ }
+
for (i = 0; i < 128; i++) {
if (c->output_regs[i].used_in_src) {
c->output_regs[i].reg = brw_vec8_grf(reg, 0);
@@ -165,28 +217,6 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
}
-static struct brw_reg get_tmp( struct brw_vs_compile *c )
-{
- struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0);
-
- if (++c->last_tmp > c->prog_data.total_grf)
- c->prog_data.total_grf = c->last_tmp;
-
- return tmp;
-}
-
-static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp )
-{
- if (tmp.nr == c->last_tmp-1)
- c->last_tmp--;
-}
-
-static void release_tmps( struct brw_vs_compile *c )
-{
- c->last_tmp = c->first_tmp;
-}
-
-
/**
* If an instruction uses a temp reg both as a src and the dest, we
* sometimes need to allocate an intermediate temporary.
@@ -633,6 +663,8 @@ static void emit_lit_noalias( struct brw_vs_compile *c,
}
brw_ENDIF(p, if_insn);
+
+ release_tmp(c, tmp);
}
static void emit_lrp_noalias(struct brw_vs_compile *c,
@@ -673,13 +705,84 @@ static void emit_nrm( struct brw_vs_compile *c,
}
+static struct brw_reg
+get_constant(struct brw_vs_compile *c,
+ const struct prog_instruction *inst,
+ GLuint argIndex)
+{
+ const struct prog_src_register *src = &inst->SrcReg[argIndex];
+ struct brw_compile *p = &c->func;
+ struct brw_reg const_reg;
+ struct brw_reg const2_reg;
+ const GLboolean relAddr = src->RelAddr;
+
+ assert(argIndex < 3);
+
+ if (c->current_const[argIndex].index != src->Index || relAddr) {
+ struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0];
+
+ c->current_const[argIndex].index = src->Index;
+
+#if 0
+ printf(" fetch const[%d] for arg %d into reg %d\n",
+ src->Index, argIndex, c->current_const[argIndex].reg.nr);
+#endif
+ /* need to fetch the constant now */
+ brw_dp_READ_4_vs(p,
+ c->current_const[argIndex].reg,/* writeback dest */
+ 0, /* oword */
+ relAddr, /* relative indexing? */
+ addrReg, /* address register */
+ 16 * src->Index, /* byte offset */
+ SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
+ );
+
+ if (relAddr) {
+ /* second read */
+ const2_reg = get_tmp(c);
+
+ /* use upper half of address reg for second read */
+ addrReg = stride(addrReg, 0, 4, 0);
+ addrReg.subnr = 16;
+
+ brw_dp_READ_4_vs(p,
+ const2_reg, /* writeback dest */
+ 1, /* oword */
+ relAddr, /* relative indexing? */
+ addrReg, /* address register */
+ 16 * src->Index, /* byte offset */
+ SURF_INDEX_VERT_CONST_BUFFER
+ );
+ }
+ }
+
+ const_reg = c->current_const[argIndex].reg;
+
+ if (relAddr) {
+ /* merge the two Owords into the constant register */
+ /* const_reg[7..4] = const2_reg[7..4] */
+ brw_MOV(p,
+ suboffset(stride(const_reg, 0, 4, 1), 4),
+ suboffset(stride(const2_reg, 0, 4, 1), 4));
+ release_tmp(c, const2_reg);
+ }
+ else {
+ /* replicate lower four floats into upper half (to get XYZWXYZW) */
+ const_reg = stride(const_reg, 0, 4, 0);
+ const_reg.subnr = 0;
+ }
+
+ return const_reg;
+}
+
+
+
/* TODO: relative addressing!
*/
static struct brw_reg get_reg( struct brw_vs_compile *c,
gl_register_file file,
GLuint index )
{
-
switch (file) {
case PROGRAM_TEMPORARY:
case PROGRAM_INPUT:
@@ -708,13 +811,17 @@ static struct brw_reg get_reg( struct brw_vs_compile *c,
}
+/**
+ * Indirect addressing: get reg[[arg] + offset].
+ */
static struct brw_reg deref( struct brw_vs_compile *c,
struct brw_reg arg,
GLint offset)
{
struct brw_compile *p = &c->func;
struct brw_reg tmp = vec4(get_tmp(c));
- struct brw_reg vp_address = retype(vec1(get_reg(c, PROGRAM_ADDRESS, 0)), BRW_REGISTER_TYPE_UW);
+ struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0];
+ struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW);
GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
struct brw_reg indirect = brw_vec4_indirect(0,0);
@@ -735,10 +842,67 @@ static struct brw_reg deref( struct brw_vs_compile *c,
brw_pop_insn_state(p);
}
+ /* NOTE: tmp not released */
return vec8(tmp);
}
+/**
+ * Get brw reg corresponding to the instruction's [argIndex] src reg.
+ * TODO: relative addressing!
+ */
+static struct brw_reg
+get_src_reg( struct brw_vs_compile *c,
+ const struct prog_instruction *inst,
+ GLuint argIndex )
+{
+ const GLuint file = inst->SrcReg[argIndex].File;
+ const GLint index = inst->SrcReg[argIndex].Index;
+ const GLboolean relAddr = inst->SrcReg[argIndex].RelAddr;
+
+ switch (file) {
+ case PROGRAM_TEMPORARY:
+ case PROGRAM_INPUT:
+ case PROGRAM_OUTPUT:
+ if (relAddr) {
+ return deref(c, c->regs[file][0], index);
+ }
+ else {
+ assert(c->regs[file][index].nr != 0);
+ return c->regs[file][index];
+ }
+
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_CONSTANT:
+ case PROGRAM_UNIFORM:
+ if (c->vp->use_const_buffer) {
+ return get_constant(c, inst, argIndex);
+ }
+ else if (relAddr) {
+ return deref(c, c->regs[PROGRAM_STATE_VAR][0], index);
+ }
+ else {
+ assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0);
+ return c->regs[PROGRAM_STATE_VAR][index];
+ }
+ case PROGRAM_ADDRESS:
+ assert(index == 0);
+ return c->regs[file][index];
+
+ case PROGRAM_UNDEFINED:
+ /* this is a normal case since we loop over all three src args */
+ return brw_null_reg();
+
+ case PROGRAM_LOCAL_PARAM:
+ case PROGRAM_ENV_PARAM:
+ case PROGRAM_WRITE_ONLY:
+ default:
+ assert(0);
+ return brw_null_reg();
+ }
+}
+
+
static void emit_arl( struct brw_vs_compile *c,
struct brw_reg dst,
struct brw_reg arg0 )
@@ -750,30 +914,31 @@ static void emit_arl( struct brw_vs_compile *c,
if (need_tmp)
tmp = get_tmp(c);
- brw_RNDD(p, tmp, arg0);
- brw_MUL(p, dst, tmp, brw_imm_d(16));
+ brw_RNDD(p, tmp, arg0); /* tmp = round(arg0) */
+ brw_MUL(p, dst, tmp, brw_imm_d(16)); /* dst = tmp * 16 */
if (need_tmp)
release_tmp(c, tmp);
}
-/* Will return mangled results for SWZ op. The emit_swz() function
+/**
+ * Return the brw reg for the given instruction's src argument.
+ * Will return mangled results for SWZ op. The emit_swz() function
* ignores this result and recalculates taking extended swizzles into
* account.
*/
static struct brw_reg get_arg( struct brw_vs_compile *c,
- struct prog_src_register *src )
+ const struct prog_instruction *inst,
+ GLuint argIndex )
{
+ const struct prog_src_register *src = &inst->SrcReg[argIndex];
struct brw_reg reg;
if (src->File == PROGRAM_UNDEFINED)
return brw_null_reg();
- if (src->RelAddr)
- reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index);
- else
- reg = get_reg(c, src->File, src->Index);
+ reg = get_src_reg(c, inst, argIndex);
/* Convert 3-bit swizzle to 2-bit.
*/
@@ -784,16 +949,38 @@ static struct brw_reg get_arg( struct brw_vs_compile *c,
/* Note this is ok for non-swizzle instructions:
*/
- reg.negate = src->NegateBase ? 1 : 0;
+ reg.negate = src->Negate ? 1 : 0;
return reg;
}
+/**
+ * Get brw register for the given program dest register.
+ */
static struct brw_reg get_dst( struct brw_vs_compile *c,
struct prog_dst_register dst )
{
- struct brw_reg reg = get_reg(c, dst.File, dst.Index);
+ struct brw_reg reg;
+
+ switch (dst.File) {
+ case PROGRAM_TEMPORARY:
+ case PROGRAM_OUTPUT:
+ assert(c->regs[dst.File][dst.Index].nr != 0);
+ reg = c->regs[dst.File][dst.Index];
+ break;
+ case PROGRAM_ADDRESS:
+ assert(dst.Index == 0);
+ reg = c->regs[dst.File][dst.Index];
+ break;
+ case PROGRAM_UNDEFINED:
+ /* we may hit this for OPCODE_END, OPCODE_KIL, etc */
+ reg = brw_null_reg();
+ break;
+ default:
+ assert(0);
+ reg = brw_null_reg();
+ }
reg.dw1.bits.writemask = dst.WriteMask;
@@ -803,14 +990,16 @@ static struct brw_reg get_dst( struct brw_vs_compile *c,
static void emit_swz( struct brw_vs_compile *c,
struct brw_reg dst,
- struct prog_src_register src )
+ const struct prog_instruction *inst)
{
+ const GLuint argIndex = 0;
+ const struct prog_src_register src = inst->SrcReg[argIndex];
struct brw_compile *p = &c->func;
GLuint zeros_mask = 0;
GLuint ones_mask = 0;
GLuint src_mask = 0;
GLubyte src_swz[4];
- GLboolean need_tmp = (src.NegateBase &&
+ GLboolean need_tmp = (src.Negate &&
dst.file != BRW_GENERAL_REGISTER_FILE);
struct brw_reg tmp = dst;
GLuint i;
@@ -844,10 +1033,7 @@ static void emit_swz( struct brw_vs_compile *c,
if (src_mask) {
struct brw_reg arg0;
- if (src.RelAddr)
- arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index);
- else
- arg0 = get_reg(c, src.File, src.Index);
+ arg0 = get_src_reg(c, inst, argIndex);
arg0 = brw_swizzle(arg0,
src_swz[0], src_swz[1],
@@ -862,8 +1048,8 @@ static void emit_swz( struct brw_vs_compile *c,
if (ones_mask)
brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1));
- if (src.NegateBase)
- brw_MOV(p, brw_writemask(tmp, src.NegateBase), negate(tmp));
+ if (src.Negate)
+ brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp));
if (need_tmp) {
brw_MOV(p, dst, tmp);
@@ -1039,21 +1225,26 @@ void brw_vs_emit(struct brw_vs_compile *c )
for (insn = 0; insn < nr_insns; insn++) {
- struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn];
+ const struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn];
struct brw_reg args[3], dst;
GLuint i;
+#if 0
+ printf("%d: ", insn);
+ _mesa_print_instruction(inst);
+#endif
+
/* Get argument regs. SWZ is special and does this itself.
*/
if (inst->Opcode != OPCODE_SWZ)
for (i = 0; i < 3; i++) {
- struct prog_src_register *src = &inst->SrcReg[i];
+ const struct prog_src_register *src = &inst->SrcReg[i];
index = src->Index;
file = src->File;
if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
args[i] = c->output_regs[index].reg;
else
- args[i] = get_arg(c, src);
+ args[i] = get_arg(c, inst, i);
}
/* Get dest regs. Note that it is possible for a reg to be both
@@ -1181,7 +1372,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
/* The args[0] value can't be used here as it won't have
* correctly encoded the full swizzle:
*/
- emit_swz(c, dst, inst->SrcReg[0] );
+ emit_swz(c, dst, inst);
break;
case OPCODE_TRUNC:
/* round toward zero */
diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
index 1a63766ea1..3d29538843 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_state.c
@@ -44,6 +44,8 @@ struct brw_vs_unit_key {
unsigned int curbe_offset;
unsigned int nr_urb_entries, urb_size;
+
+ unsigned int nr_surfaces;
};
static void
@@ -62,6 +64,9 @@ vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key)
key->nr_urb_entries = brw->urb.nr_vs_entries;
key->urb_size = brw->urb.vsize;
+ /* BRW_NEW_NR_VS_SURFACES */
+ key->nr_surfaces = brw->vs.nr_surfaces;
+
/* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */
if (ctx->Transform.ClipPlanesEnabled) {
/* Note that we read in the userclip planes as well, hence
@@ -92,6 +97,8 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
* brw_urb_WRITE() results.
*/
vs.thread1.single_program_flow = 0;
+ vs.thread1.binding_table_entry_count = key->nr_surfaces;
+
vs.thread3.urb_entry_read_length = key->urb_entry_read_length;
vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
vs.thread3.dispatch_grf_start_reg = 1;
@@ -158,6 +165,7 @@ const struct brw_tracked_state brw_vs_unit = {
.dirty = {
.mesa = _NEW_TRANSFORM,
.brw = (BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_NR_VS_SURFACES |
BRW_NEW_URB_FENCE),
.cache = CACHE_NEW_VS_PROG
},
diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
new file mode 100644
index 0000000000..89f47522a1
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@@ -0,0 +1,226 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/mtypes.h"
+#include "main/texformat.h"
+#include "main/texstore.h"
+#include "shader/prog_parameter.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+/* Creates a new VS constant buffer reflecting the current VS program's
+ * constants, if needed by the VS program.
+ *
+ * Otherwise, constants go through the CURBEs using the brw_constant_buffer
+ * state atom.
+ */
+static drm_intel_bo *
+brw_vs_update_constant_buffer(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+ struct brw_vertex_program *vp =
+ (struct brw_vertex_program *) brw->vertex_program;
+ const struct gl_program_parameter_list *params = vp->program.Base.Parameters;
+ const int size = params->NumParameters * 4 * sizeof(GLfloat);
+ drm_intel_bo *const_buffer;
+
+ /* BRW_NEW_VERTEX_PROGRAM */
+ if (!vp->use_const_buffer)
+ return NULL;
+
+ const_buffer = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer",
+ size, 64);
+
+ /* _NEW_PROGRAM_CONSTANTS */
+ dri_bo_subdata(const_buffer, 0, size, params->ParameterValues);
+
+ return const_buffer;
+}
+
+/**
+ * Update the surface state for a VS constant buffer.
+ *
+ * Sets brw->vs.surf_bo[surf] and brw->vp->const_buffer.
+ */
+static void
+brw_update_vs_constant_surface( GLcontext *ctx,
+ GLuint surf)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_surface_key key;
+ struct brw_vertex_program *vp =
+ (struct brw_vertex_program *) brw->vertex_program;
+ const struct gl_program_parameter_list *params = vp->program.Base.Parameters;
+
+ assert(surf == 0);
+
+ /* If we're in this state update atom, we need to update VS constants, so
+ * free the old buffer and create a new one for the new contents.
+ */
+ dri_bo_unreference(vp->const_buffer);
+ vp->const_buffer = brw_vs_update_constant_buffer(brw);
+
+ /* If there's no constant buffer, then no surface BO is needed to point at
+ * it.
+ */
+ if (vp->const_buffer == 0) {
+ drm_intel_bo_unreference(brw->vs.surf_bo[surf]);
+ brw->vs.surf_bo[surf] = NULL;
+ return;
+ }
+
+ memset(&key, 0, sizeof(key));
+
+ key.format = MESA_FORMAT_RGBA_FLOAT32;
+ key.internal_format = GL_RGBA;
+ key.bo = vp->const_buffer;
+ key.depthmode = GL_NONE;
+ key.pitch = params->NumParameters;
+ key.width = params->NumParameters;
+ key.height = 1;
+ key.depth = 1;
+ key.cpp = 16;
+
+ /*
+ printf("%s:\n", __FUNCTION__);
+ printf(" width %d height %d depth %d cpp %d pitch %d\n",
+ key.width, key.height, key.depth, key.cpp, key.pitch);
+ */
+
+ drm_intel_bo_unreference(brw->vs.surf_bo[surf]);
+ brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
+ BRW_SS_SURFACE,
+ &key, sizeof(key),
+ &key.bo, key.bo ? 1 : 0,
+ NULL);
+ if (brw->vs.surf_bo[surf] == NULL) {
+ brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key);
+ }
+}
+
+
+/**
+ * Constructs the binding table for the VS surface state.
+ */
+static dri_bo *
+brw_vs_get_binding_table(struct brw_context *brw)
+{
+ dri_bo *bind_bo;
+
+ bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
+ NULL, 0,
+ brw->vs.surf_bo, BRW_VS_MAX_SURF,
+ NULL);
+
+ if (bind_bo == NULL) {
+ GLuint data_size = BRW_VS_MAX_SURF * sizeof(GLuint);
+ uint32_t *data = malloc(data_size);
+ int i;
+
+ for (i = 0; i < BRW_VS_MAX_SURF; i++)
+ if (brw->vs.surf_bo[i])
+ data[i] = brw->vs.surf_bo[i]->offset;
+ else
+ data[i] = 0;
+
+ bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
+ NULL, 0,
+ brw->vs.surf_bo, BRW_VS_MAX_SURF,
+ data, data_size,
+ NULL, NULL);
+
+ /* Emit binding table relocations to surface state */
+ for (i = 0; i < BRW_VS_MAX_SURF; i++) {
+ if (brw->vs.surf_bo[i] != NULL) {
+ /* The presumed offsets were set in the data values for
+ * brw_upload_cache.
+ */
+ drm_intel_bo_emit_reloc(bind_bo, i * 4,
+ brw->vs.surf_bo[i], 0,
+ I915_GEM_DOMAIN_INSTRUCTION, 0);
+ }
+ }
+
+ free(data);
+ }
+
+ return bind_bo;
+}
+
+/**
+ * Vertex shader surfaces (constant buffer).
+ *
+ * This consumes the state updates for the constant buffer needing
+ * to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit and
+ * CACHE_NEW_SURF_BIND for the binding table upload.
+ */
+static void prepare_vs_surfaces(struct brw_context *brw )
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ int i;
+ int nr_surfaces = 0;
+
+ brw_update_vs_constant_surface(ctx, SURF_INDEX_VERT_CONST_BUFFER);
+
+ for (i = 0; i < BRW_VS_MAX_SURF; i++) {
+ if (brw->vs.surf_bo[i] != NULL) {
+ nr_surfaces = i + 1;
+ }
+ }
+
+ if (brw->vs.nr_surfaces != nr_surfaces) {
+ brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES;
+ brw->vs.nr_surfaces = nr_surfaces;
+ }
+
+ /* Note that we don't end up updating the bind_bo if we don't have a
+ * surface to be pointing at. This should be relatively harmless, as it
+ * just slightly increases our working set size.
+ */
+ if (brw->vs.nr_surfaces != 0) {
+ dri_bo_unreference(brw->vs.bind_bo);
+ brw->vs.bind_bo = brw_vs_get_binding_table(brw);
+ }
+}
+
+const struct brw_tracked_state brw_vs_surfaces = {
+ .dirty = {
+ .mesa = (_NEW_PROGRAM_CONSTANTS),
+ .brw = (BRW_NEW_VERTEX_PROGRAM),
+ .cache = 0
+ },
+ .prepare = prepare_vs_surfaces,
+};
+
+
+
diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c
index 960bbb311e..ba03afd6c1 100644
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -79,6 +79,7 @@ static void brw_destroy_context( struct intel_context *intel )
dri_bo_release(&brw->curbe.curbe_bo);
dri_bo_release(&brw->vs.prog_bo);
dri_bo_release(&brw->vs.state_bo);
+ dri_bo_release(&brw->vs.bind_bo);
dri_bo_release(&brw->gs.prog_bo);
dri_bo_release(&brw->gs.state_bo);
dri_bo_release(&brw->clip.prog_bo);
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 90d74c2885..3e476fd3be 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -146,6 +146,13 @@ static void do_wm_prog( struct brw_context *brw,
if (c == NULL) {
brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data));
c = brw->wm.compile_data;
+ if (c == NULL) {
+ /* Ouch - big out of memory problem. Can't continue
+ * without triggering a segfault, no way to signal,
+ * so just return.
+ */
+ return;
+ }
} else {
memset(c, 0, sizeof(*brw->wm.compile_data));
}
@@ -312,6 +319,9 @@ static void brw_wm_populate_key( struct brw_context *brw,
key->drawable_height = brw->intel.driDrawable->h;
}
+ /* CACHE_NEW_VS_PROG */
+ key->vp_outputs_written = brw->vs.prog_data->outputs_written & DO_SETUP_BITS;
+
/* The unique fragment program ID */
key->program_string_id = fp->id;
}
@@ -350,7 +360,7 @@ const struct brw_tracked_state brw_wm_prog = {
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_WM_INPUT_DIMENSIONS |
BRW_NEW_REDUCED_PRIMITIVE),
- .cache = 0
+ .cache = CACHE_NEW_VS_PROG,
},
.prepare = brw_prepare_wm_prog
};
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index d0ab3bdc65..fb15c03e83 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -75,6 +75,7 @@ struct brw_wm_prog_key {
GLuint program_string_id:32;
GLuint origin_x, origin_y;
GLuint drawable_height;
+ GLuint vp_outputs_written;
};
@@ -240,22 +241,25 @@ struct brw_wm_compile {
GLuint max_wm_grf;
GLuint last_scratch;
+ GLuint cur_inst; /**< index of current instruction */
+
+ GLboolean out_of_regs; /**< ran out of GRF registers? */
+
/** Mapping from Mesa registers to hardware registers */
struct {
GLboolean inited;
struct brw_reg reg;
} wm_regs[PROGRAM_PAYLOAD+1][256][4];
+ GLboolean used_grf[BRW_WM_MAX_GRF];
+ GLuint first_free_grf;
struct brw_reg stack;
struct brw_reg emit_mask_reg;
- GLuint reg_index; /**< Index of next free GRF register */
GLuint tmp_regs[BRW_WM_MAX_GRF];
GLuint tmp_index;
GLuint tmp_max;
GLuint subroutines[BRW_WM_MAX_SUBROUTINE];
- /** using a real constant buffer? */
- GLboolean use_const_buffer;
/** we may need up to 3 constants per instruction (if use_const_buffer) */
struct {
GLint index;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index d65b1332c6..14ab9042de 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -353,6 +353,19 @@ static void emit_mad( struct brw_compile *p,
}
}
+static void emit_trunc( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_RNDZ(p, dst[i], arg0[i]);
+ }
+ }
+}
static void emit_lrp( struct brw_compile *p,
const struct brw_reg *dst,
@@ -742,7 +755,7 @@ static void emit_tex( struct brw_wm_compile *c,
retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
1,
retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
- inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */
+ SURF_INDEX_TEXTURE(inst->tex_unit),
inst->tex_unit, /* sampler */
inst->writemask,
(inst->tex_shadow ?
@@ -791,7 +804,7 @@ static void emit_txb( struct brw_wm_compile *c,
retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
1,
retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
- inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */
+ SURF_INDEX_TEXTURE(inst->tex_unit),
inst->tex_unit, /* sampler */
inst->writemask,
BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
@@ -1224,6 +1237,10 @@ void brw_wm_emit( struct brw_wm_compile *c )
emit_dph(p, dst, dst_flags, args[0], args[1]);
break;
+ case OPCODE_TRUNC:
+ emit_trunc(p, dst, dst_flags, args[0]);
+ break;
+
case OPCODE_LRP:
emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c
index a7f5f1b9a2..1798d842c7 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_fp.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c
@@ -80,9 +80,8 @@ static struct prog_src_register src_reg(GLuint file, GLuint idx)
reg.Index = idx;
reg.Swizzle = SWIZZLE_NOOP;
reg.RelAddr = 0;
- reg.NegateBase = 0;
+ reg.Negate = NEGATE_NONE;
reg.Abs = 0;
- reg.NegateAbs = 0;
return reg;
}
@@ -569,7 +568,7 @@ static void precalc_dst( struct brw_wm_compile *c,
src_undef(),
src_undef());
/* Avoid letting negation flag of src0 affect our 1 constant. */
- swz->SrcReg[0].NegateBase &= ~NEGATE_X;
+ swz->SrcReg[0].Negate &= ~NEGATE_X;
}
if (dst.WriteMask & WRITEMASK_W) {
/* dst.w = mov src1.w
@@ -604,7 +603,7 @@ static void precalc_lit( struct brw_wm_compile *c,
src_undef(),
src_undef());
/* Avoid letting the negation flag of src0 affect our 1 constant. */
- swz->SrcReg[0].NegateBase = 0;
+ swz->SrcReg[0].Negate = NEGATE_NONE;
}
if (dst.WriteMask & WRITEMASK_YZ) {
@@ -651,7 +650,7 @@ static void precalc_tex( struct brw_wm_compile *c,
src0,
src_undef(),
src_undef());
- out->SrcReg[0].NegateBase = 0;
+ out->SrcReg[0].Negate = NEGATE_NONE;
out->SrcReg[0].Abs = 1;
/* tmp0 = MAX(coord.X, coord.Y) */
@@ -1050,14 +1049,14 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
case OPCODE_ABS:
out = emit_insn(c, inst);
out->Opcode = OPCODE_MOV;
- out->SrcReg[0].NegateBase = 0;
+ out->SrcReg[0].Negate = NEGATE_NONE;
out->SrcReg[0].Abs = 1;
break;
case OPCODE_SUB:
out = emit_insn(c, inst);
out->Opcode = OPCODE_ADD;
- out->SrcReg[1].NegateBase ^= 0xf;
+ out->SrcReg[1].Negate ^= NEGATE_XYZW;
break;
case OPCODE_SCS:
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
index 575cd45d57..0e6a2f8ef0 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -1,5 +1,7 @@
#include "main/macros.h"
#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+#include "shader/prog_optimize.h"
#include "brw_context.h"
#include "brw_eu.h"
#include "brw_wm.h"
@@ -21,7 +23,6 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
const struct prog_instruction *inst = &fp->Base.Instructions[i];
switch (inst->Opcode) {
case OPCODE_IF:
- case OPCODE_TRUNC:
case OPCODE_ENDIF:
case OPCODE_CAL:
case OPCODE_BRK:
@@ -42,6 +43,83 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
}
+
+static void
+reclaim_temps(struct brw_wm_compile *c);
+
+
+/** Mark GRF register as used. */
+static void
+prealloc_grf(struct brw_wm_compile *c, int r)
+{
+ c->used_grf[r] = GL_TRUE;
+}
+
+
+/** Mark given GRF register as not in use. */
+static void
+release_grf(struct brw_wm_compile *c, int r)
+{
+ /*assert(c->used_grf[r]);*/
+ c->used_grf[r] = GL_FALSE;
+ c->first_free_grf = MIN2(c->first_free_grf, r);
+}
+
+
+/** Return index of a free GRF, mark it as used. */
+static int
+alloc_grf(struct brw_wm_compile *c)
+{
+ GLuint r;
+ for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
+ if (!c->used_grf[r]) {
+ c->used_grf[r] = GL_TRUE;
+ c->first_free_grf = r + 1; /* a guess */
+ return r;
+ }
+ }
+
+ /* no free temps, try to reclaim some */
+ reclaim_temps(c);
+ c->first_free_grf = 0;
+
+ /* try alloc again */
+ for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
+ if (!c->used_grf[r]) {
+ c->used_grf[r] = GL_TRUE;
+ c->first_free_grf = r + 1; /* a guess */
+ return r;
+ }
+ }
+
+ for (r = 0; r < BRW_WM_MAX_GRF; r++) {
+ assert(c->used_grf[r]);
+ }
+
+ /* really, no free GRF regs found */
+ if (!c->out_of_regs) {
+ /* print warning once per compilation */
+ _mesa_warning(NULL, "i965: ran out of registers for fragment program");
+ c->out_of_regs = GL_TRUE;
+ }
+
+ return -1;
+}
+
+
+/** Return number of GRF registers used */
+static int
+num_grf_used(const struct brw_wm_compile *c)
+{
+ int r;
+ for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--)
+ if (c->used_grf[r])
+ return r + 1;
+ return 0;
+}
+
+
+
/**
* Record the mapping of a Mesa register to a hardware register.
*/
@@ -56,7 +134,7 @@ static void set_reg(struct brw_wm_compile *c, int file, int index,
* Examine instruction's write mask to find index of first component
* enabled for writing.
*/
-static int get_scalar_dst_index(struct prog_instruction *inst)
+static int get_scalar_dst_index(const struct prog_instruction *inst)
{
int i;
for (i = 0; i < 4; i++)
@@ -68,11 +146,23 @@ static int get_scalar_dst_index(struct prog_instruction *inst)
static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
{
struct brw_reg reg;
- if(c->tmp_index == c->tmp_max)
- c->tmp_regs[ c->tmp_max++ ] = c->reg_index++;
-
+
+ /* if we need to allocate another temp, grow the tmp_regs[] array */
+ if (c->tmp_index == c->tmp_max) {
+ int r = alloc_grf(c);
+ if (r < 0) {
+ /*printf("Out of temps in %s\n", __FUNCTION__);*/
+ r = 50; /* XXX random register! */
+ }
+ c->tmp_regs[ c->tmp_max++ ] = r;
+ }
+
+ /* form the GRF register */
reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0);
+ /*printf("alloc_temp %d\n", reg.nr);*/
+ assert(reg.nr < BRW_WM_MAX_GRF);
return reg;
+
}
/**
@@ -130,35 +220,29 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component,
return brw_null_reg();
}
+ assert(index < 256);
+ assert(component < 4);
+
/* see if we've already allocated a HW register for this Mesa register */
if (c->wm_regs[file][index][component].inited) {
- /* yes, re-use */
- reg = c->wm_regs[file][index][component].reg;
+ /* yes, re-use */
+ reg = c->wm_regs[file][index][component].reg;
}
else {
/* no, allocate new register */
- reg = brw_vec8_grf(c->reg_index, 0);
- }
+ int grf = alloc_grf(c);
+ /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/
+ if (grf < 0) {
+ /* totally out of temps */
+ grf = 51; /* XXX random register! */
+ }
- /* if this is a new register allocation, record it in the table */
- if (!c->wm_regs[file][index][component].inited) {
- set_reg(c, file, index, component, reg);
- c->reg_index++;
- }
+ reg = brw_vec8_grf(grf, 0);
+ /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/
- if (c->reg_index >= BRW_WM_MAX_GRF - 12) {
- /* ran out of temporary registers! */
-#if 1
- /* This is a big hack for now.
- * Return bad register index, just don't hang the GPU.
- */
- _mesa_fprintf(stderr, "out of regs %d\n", c->reg_index);
- c->reg_index = BRW_WM_MAX_GRF - 13;
-#else
- return brw_null_reg();
-#endif
+ set_reg(c, file, index, component, reg);
}
-
+
if (neg & (1 << component)) {
reg = negate(reg);
}
@@ -168,6 +252,46 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component,
}
+
+/**
+ * This is called if we run out of GRF registers. Examine the live intervals
+ * of temp regs in the program and free those which won't be used again.
+ */
+static void
+reclaim_temps(struct brw_wm_compile *c)
+{
+ GLint intBegin[MAX_PROGRAM_TEMPS];
+ GLint intEnd[MAX_PROGRAM_TEMPS];
+ int index;
+
+ /*printf("Reclaim temps:\n");*/
+
+ _mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns,
+ intBegin, intEnd);
+
+ for (index = 0; index < MAX_PROGRAM_TEMPS; index++) {
+ if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) {
+ /* program temp[i] can be freed */
+ int component;
+ /*printf(" temp[%d] is dead\n", index);*/
+ for (component = 0; component < 4; component++) {
+ if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) {
+ int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr;
+ release_grf(c, r);
+ /*
+ printf(" Reclaim temp %d, reg %d at inst %d\n",
+ index, r, c->cur_inst);
+ */
+ c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE;
+ }
+ }
+ }
+ }
+}
+
+
+
+
/**
* Preallocate registers. This sets up the Mesa to hardware register
* mapping for certain registers, such as constants (uniforms/state vars)
@@ -177,8 +301,12 @@ static void prealloc_reg(struct brw_wm_compile *c)
{
int i, j;
struct brw_reg reg;
- int nr_interp_regs = 0;
+ int urb_read_length = 0;
GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted;
+ GLuint reg_index = 0;
+
+ memset(c->used_grf, GL_FALSE, sizeof(c->used_grf));
+ c->first_free_grf = 0;
for (i = 0; i < 4; i++) {
if (i < c->key.nr_depth_regs)
@@ -187,16 +315,22 @@ static void prealloc_reg(struct brw_wm_compile *c)
reg = brw_vec8_grf(0, 0);
set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg);
}
- c->reg_index += 2 * c->key.nr_depth_regs;
+ reg_index += 2 * c->key.nr_depth_regs;
/* constants */
{
- const int nr_params = c->fp->program.Base.Parameters->NumParameters;
+ const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters;
+ const GLuint nr_temps = c->fp->program.Base.NumTemporaries;
/* use a real constant buffer, or just use a section of the GRF? */
- c->use_const_buffer = GL_FALSE; /* (nr_params > 8);*/
+ /* XXX this heuristic may need adjustment... */
+ if ((nr_params + nr_temps) * 4 + reg_index > 80)
+ c->fp->use_const_buffer = GL_TRUE;
+ else
+ c->fp->use_const_buffer = GL_FALSE;
+ /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
- if (c->use_const_buffer) {
+ if (c->fp->use_const_buffer) {
/* We'll use a real constant buffer and fetch constants from
* it with a dataport read message.
*/
@@ -216,7 +350,7 @@ static void prealloc_reg(struct brw_wm_compile *c)
for (i = 0; i < nr_params; i++) {
/* loop over XYZW channels */
for (j = 0; j < 4; j++, index++) {
- reg = brw_vec1_grf(c->reg_index + index / 8, index % 8);
+ reg = brw_vec1_grf(reg_index + index / 8, index % 8);
/* Save pointer to parameter/constant value.
* Constants will be copied in prepare_constant_buffer()
*/
@@ -226,42 +360,62 @@ static void prealloc_reg(struct brw_wm_compile *c)
}
/* number of constant regs used (each reg is float[8]) */
c->nr_creg = 2 * ((4 * nr_params + 15) / 16);
- c->reg_index += c->nr_creg;
+ reg_index += c->nr_creg;
}
}
/* fragment shader inputs */
- for (i = 0; i < FRAG_ATTRIB_MAX; i++) {
- if (inputs & (1<<i)) {
- nr_interp_regs++;
- reg = brw_vec8_grf(c->reg_index, 0);
- for (j = 0; j < 4; j++)
- set_reg(c, PROGRAM_PAYLOAD, i, j, reg);
- c->reg_index += 2;
- }
+ for (i = 0; i < VERT_RESULT_MAX; i++) {
+ int fp_input;
+
+ if (i >= VERT_RESULT_VAR0)
+ fp_input = i - VERT_RESULT_VAR0 + FRAG_ATTRIB_VAR0;
+ else if (i <= VERT_RESULT_TEX7)
+ fp_input = i;
+ else
+ fp_input = -1;
+
+ if (fp_input >= 0 && inputs & (1 << fp_input)) {
+ urb_read_length = reg_index;
+ reg = brw_vec8_grf(reg_index, 0);
+ for (j = 0; j < 4; j++)
+ set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg);
+ }
+ if (c->key.vp_outputs_written & (1 << i)) {
+ reg_index += 2;
+ }
}
c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
- c->prog_data.urb_read_length = nr_interp_regs * 2;
+ c->prog_data.urb_read_length = urb_read_length;
c->prog_data.curb_read_length = c->nr_creg;
- c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
- c->reg_index++;
- c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
- c->reg_index += 2;
+ c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
+ reg_index++;
+ c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
+ reg_index += 2;
+
+ /* mark GRF regs [0..reg_index-1] as in-use */
+ for (i = 0; i < reg_index; i++)
+ prealloc_grf(c, i);
+
+ /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */
+ prealloc_grf(c, 126);
+ prealloc_grf(c, 127);
/* An instruction may reference up to three constants.
* They'll be found in these registers.
* XXX alloc these on demand!
*/
- if (c->use_const_buffer) {
- c->current_const[0].reg = alloc_tmp(c);
- c->current_const[1].reg = alloc_tmp(c);
- c->current_const[2].reg = alloc_tmp(c);
- }
- /*
- printf("USE CONST BUFFER? %d\n", c->use_const_buffer);
- printf("AFTER PRE_ALLOC, reg_index = %d\n", c->reg_index);
- */
+ if (c->fp->use_const_buffer) {
+ for (i = 0; i < 3; i++) {
+ c->current_const[i].index = -1;
+ c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0);
+ }
+ }
+#if 0
+ printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer);
+ printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index);
+#endif
}
@@ -282,45 +436,21 @@ static void fetch_constants(struct brw_wm_compile *c,
if (src->File == PROGRAM_STATE_VAR ||
src->File == PROGRAM_CONSTANT ||
src->File == PROGRAM_UNIFORM) {
- if (c->current_const[i].index != src->Index) {
-
- c->current_const[i].index = src->Index;
- /*c->current_const[i].reg = alloc_tmp(c);*/
-
- /*
- printf(" fetch const[%d] for arg %d into reg %d\n",
- src->Index, i, c->current_const[i].reg.nr);
- */
-
- /* need to fetch the constant now */
- brw_dp_READ_4(p,
- c->current_const[i].reg, /* writeback dest */
- 1, /* msg_reg */
- src->RelAddr, /* relative indexing? */
- 16 * src->Index, /* byte offset */
- BRW_WM_MAX_SURF - 1 /* binding table index */
- );
+ c->current_const[i].index = src->Index;
#if 0
- /* dependency stall */
- {
- int response_length = 1;
- int mark = mark_tmps( c );
- struct brw_reg src = c->current_const[i].reg;
- struct brw_reg tmp = alloc_tmp(c);
-
- /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
- */
- brw_push_insn_state(p);
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_MOV(p, tmp, src);
- brw_MOV(p, src, tmp);
- brw_pop_insn_state(p);
-
- release_tmps( c, mark );
- }
+ printf(" fetch const[%d] for arg %d into reg %d\n",
+ src->Index, i, c->current_const[i].reg.nr);
#endif
- }
+
+ /* need to fetch the constant now */
+ brw_dp_READ_4(p,
+ c->current_const[i].reg, /* writeback dest */
+ 1, /* msg_reg */
+ src->RelAddr, /* relative indexing? */
+ 16 * src->Index, /* byte offset */
+ SURF_INDEX_FRAG_CONST_BUFFER/* binding table index */
+ );
}
}
}
@@ -361,18 +491,18 @@ get_src_reg_const(struct brw_wm_compile *c,
const_reg = stride(const_reg, 0, 1, 0);
const_reg.subnr = component * 4;
- if (src->NegateBase)
+ if (src->Negate & (1 << component))
const_reg = negate(const_reg);
if (src->Abs)
const_reg = brw_abs(const_reg);
- /*
- printf(" form const[%d] for arg %d, comp %d, reg %d\n",
+#if 0
+ printf(" form const[%d].%d for arg %d, reg %d\n",
c->current_const[srcRegIndex].index,
- srcRegIndex,
component,
+ srcRegIndex,
const_reg.nr);
- */
+#endif
return const_reg;
}
@@ -389,7 +519,15 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c,
const GLuint nr = 1;
const GLuint component = GET_SWZ(src->Swizzle, channel);
- if (c->use_const_buffer &&
+ /* Extended swizzle terms */
+ if (component == SWIZZLE_ZERO) {
+ return brw_imm_f(0.0F);
+ }
+ else if (component == SWIZZLE_ONE) {
+ return brw_imm_f(1.0F);
+ }
+
+ if (c->fp->use_const_buffer &&
(src->File == PROGRAM_STATE_VAR ||
src->File == PROGRAM_CONSTANT ||
src->File == PROGRAM_UNIFORM)) {
@@ -398,7 +536,7 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c,
else {
/* other type of source register */
return get_reg(c, src->File, src->Index, component, nr,
- src->NegateBase, src->Abs);
+ src->Negate, src->Abs);
}
}
@@ -423,11 +561,13 @@ static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c,
const GLfloat *param =
c->fp->program.Base.Parameters->ParameterValues[src->Index];
GLfloat value = param[component];
- if (src->NegateBase)
+ if (src->Negate & (1 << channel))
value = -value;
if (src->Abs)
value = FABSF(value);
- /*printf(" form imm reg %f\n", value);*/
+#if 0
+ printf(" form immed value %f for chan %d\n", value, channel);
+#endif
return brw_imm_f(value);
}
else {
@@ -501,7 +641,7 @@ static void invoke_subroutine( struct brw_wm_compile *c,
}
static void emit_abs( struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
int i;
struct brw_compile *p = &c->func;
@@ -518,7 +658,7 @@ static void emit_abs( struct brw_wm_compile *c,
}
static void emit_trunc( struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
int i;
struct brw_compile *p = &c->func;
@@ -536,7 +676,7 @@ static void emit_trunc( struct brw_wm_compile *c,
}
static void emit_mov( struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
int i;
struct brw_compile *p = &c->func;
@@ -546,7 +686,9 @@ static void emit_mov( struct brw_wm_compile *c,
if (mask & (1<<i)) {
struct brw_reg src, dst;
dst = get_dst_reg(c, inst, i);
- src = get_src_reg_imm(c, inst, 0, i);
+ /* XXX some moves from immediate value don't work reliably!!! */
+ /*src = get_src_reg_imm(c, inst, 0, i);*/
+ src = get_src_reg(c, inst, 0, i);
brw_MOV(p, dst, src);
}
}
@@ -554,7 +696,7 @@ static void emit_mov( struct brw_wm_compile *c,
}
static void emit_pixel_xy(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_reg r1 = brw_vec1_grf(1, 0);
struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
@@ -584,7 +726,7 @@ static void emit_pixel_xy(struct brw_wm_compile *c,
}
static void emit_delta_xy(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_reg r1 = brw_vec1_grf(1, 0);
struct brw_reg dst0, dst1, src0, src1;
@@ -644,7 +786,7 @@ static void fire_fb_write( struct brw_wm_compile *c,
}
static void emit_fb_write(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
int nr = 2;
@@ -682,27 +824,26 @@ static void emit_fb_write(struct brw_wm_compile *c,
}
if (c->key.dest_depth_reg) {
- GLuint comp = c->key.dest_depth_reg / 2;
- GLuint off = c->key.dest_depth_reg % 2;
+ const GLuint comp = c->key.dest_depth_reg / 2;
+ const GLuint off = c->key.dest_depth_reg % 2;
- assert(comp == 1);
- assert(off == 0);
-#if 0
- /* XXX do we need this code? comp always 1, off always 0, it seems */
if (off != 0) {
+ /* XXX this code needs review/testing */
+ struct brw_reg arg1_0 = get_src_reg(c, inst, 1, comp);
+ struct brw_reg arg1_1 = get_src_reg(c, inst, 1, comp+1);
+
brw_push_insn_state(p);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
+ brw_MOV(p, brw_message_reg(nr), offset(arg1_0, 1));
/* 2nd half? */
- brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
+ brw_MOV(p, brw_message_reg(nr+1), arg1_1);
brw_pop_insn_state(p);
}
else
-#endif
{
- struct brw_reg src = get_src_reg(c, inst, 1, 1);
- brw_MOV(p, brw_message_reg(nr), src);
+ struct brw_reg src = get_src_reg(c, inst, 1, 1);
+ brw_MOV(p, brw_message_reg(nr), src);
}
nr += 2;
}
@@ -713,7 +854,7 @@ static void emit_fb_write(struct brw_wm_compile *c,
}
static void emit_pixel_w( struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
@@ -743,7 +884,7 @@ static void emit_pixel_w( struct brw_wm_compile *c,
}
static void emit_linterp(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
@@ -772,7 +913,7 @@ static void emit_linterp(struct brw_wm_compile *c,
}
static void emit_cinterp(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
@@ -798,7 +939,7 @@ static void emit_cinterp(struct brw_wm_compile *c,
}
static void emit_pinterp(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
@@ -832,7 +973,7 @@ static void emit_pinterp(struct brw_wm_compile *c,
/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
static void emit_frontfacing(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
@@ -861,7 +1002,7 @@ static void emit_frontfacing(struct brw_wm_compile *c,
}
static void emit_xpd(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
int i;
struct brw_compile *p = &c->func;
@@ -886,7 +1027,7 @@ static void emit_xpd(struct brw_wm_compile *c,
}
static void emit_dp3(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_reg src0[3], src1[3], dst;
int i;
@@ -905,7 +1046,7 @@ static void emit_dp3(struct brw_wm_compile *c,
}
static void emit_dp4(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_reg src0[4], src1[4], dst;
int i;
@@ -924,7 +1065,7 @@ static void emit_dp4(struct brw_wm_compile *c,
}
static void emit_dph(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_reg src0[4], src1[4], dst;
int i;
@@ -948,7 +1089,7 @@ static void emit_dph(struct brw_wm_compile *c,
* register's X, Y, Z and W channels (subject to writemasking of course).
*/
static void emit_math1(struct brw_wm_compile *c,
- struct prog_instruction *inst, GLuint func)
+ const struct prog_instruction *inst, GLuint func)
{
struct brw_compile *p = &c->func;
struct brw_reg src0, dst, tmp;
@@ -985,43 +1126,43 @@ static void emit_math1(struct brw_wm_compile *c,
}
static void emit_rcp(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_math1(c, inst, BRW_MATH_FUNCTION_INV);
}
static void emit_rsq(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ);
}
static void emit_sin(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_math1(c, inst, BRW_MATH_FUNCTION_SIN);
}
static void emit_cos(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_math1(c, inst, BRW_MATH_FUNCTION_COS);
}
static void emit_ex2(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_math1(c, inst, BRW_MATH_FUNCTION_EXP);
}
static void emit_lg2(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_math1(c, inst, BRW_MATH_FUNCTION_LOG);
}
static void emit_add(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
struct brw_reg src0, src1, dst;
@@ -1040,7 +1181,7 @@ static void emit_add(struct brw_wm_compile *c,
}
static void emit_arl(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
struct brw_reg src0, addr_reg;
@@ -1053,7 +1194,7 @@ static void emit_arl(struct brw_wm_compile *c,
}
static void emit_sub(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
struct brw_reg src0, src1, dst;
@@ -1072,7 +1213,7 @@ static void emit_sub(struct brw_wm_compile *c,
}
static void emit_mul(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
struct brw_reg src0, src1, dst;
@@ -1091,7 +1232,7 @@ static void emit_mul(struct brw_wm_compile *c,
}
static void emit_frc(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
struct brw_reg src0, dst;
@@ -1110,7 +1251,7 @@ static void emit_frc(struct brw_wm_compile *c,
}
static void emit_flr(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
struct brw_reg src0, dst;
@@ -1177,7 +1318,7 @@ static void emit_min_max(struct brw_wm_compile *c,
}
static void emit_pow(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
struct brw_reg dst, src0, src1;
@@ -1199,7 +1340,7 @@ static void emit_pow(struct brw_wm_compile *c,
}
static void emit_lrp(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
@@ -1252,7 +1393,7 @@ static void emit_kil(struct brw_wm_compile *c)
}
static void emit_mad(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
@@ -1275,7 +1416,7 @@ static void emit_mad(struct brw_wm_compile *c,
}
static void emit_sop(struct brw_wm_compile *c,
- struct prog_instruction *inst, GLuint cond)
+ const struct prog_instruction *inst, GLuint cond)
{
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
@@ -1299,43 +1440,43 @@ static void emit_sop(struct brw_wm_compile *c,
}
static void emit_slt(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_sop(c, inst, BRW_CONDITIONAL_L);
}
static void emit_sle(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_sop(c, inst, BRW_CONDITIONAL_LE);
}
static void emit_sgt(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_sop(c, inst, BRW_CONDITIONAL_G);
}
static void emit_sge(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_sop(c, inst, BRW_CONDITIONAL_GE);
}
static void emit_seq(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_sop(c, inst, BRW_CONDITIONAL_EQ);
}
static void emit_sne(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_sop(c, inst, BRW_CONDITIONAL_NEQ);
}
static void emit_ddx(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
@@ -1362,7 +1503,7 @@ static void emit_ddx(struct brw_wm_compile *c,
}
static void emit_ddy(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
@@ -1505,7 +1646,7 @@ static void noise1_sub( struct brw_wm_compile *c ) {
}
static void emit_noise1( struct brw_wm_compile *c,
- struct prog_instruction *inst )
+ const struct prog_instruction *inst )
{
struct brw_compile *p = &c->func;
struct brw_reg src, param, dst;
@@ -1675,7 +1816,7 @@ static void noise2_sub( struct brw_wm_compile *c ) {
}
static void emit_noise2( struct brw_wm_compile *c,
- struct prog_instruction *inst )
+ const struct prog_instruction *inst )
{
struct brw_compile *p = &c->func;
struct brw_reg src0, src1, param0, param1, dst;
@@ -1978,7 +2119,7 @@ static void noise3_sub( struct brw_wm_compile *c ) {
}
static void emit_noise3( struct brw_wm_compile *c,
- struct prog_instruction *inst )
+ const struct prog_instruction *inst )
{
struct brw_compile *p = &c->func;
struct brw_reg src0, src1, src2, param0, param1, param2, dst;
@@ -2401,7 +2542,7 @@ static void noise4_sub( struct brw_wm_compile *c )
}
static void emit_noise4( struct brw_wm_compile *c,
- struct prog_instruction *inst )
+ const struct prog_instruction *inst )
{
struct brw_compile *p = &c->func;
struct brw_reg src0, src1, src2, src3, param0, param1, param2, param3, dst;
@@ -2443,7 +2584,7 @@ static void emit_noise4( struct brw_wm_compile *c,
}
static void emit_wpos_xy(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
@@ -2479,7 +2620,7 @@ static void emit_wpos_xy(struct brw_wm_compile *c,
BIAS on SIMD8 not working yet...
*/
static void emit_txb(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
struct brw_reg dst[4], src[4], payload_reg;
@@ -2517,7 +2658,7 @@ static void emit_txb(struct brw_wm_compile *c,
retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */
1, /* msg_reg_nr */
retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */
- unit + MAX_DRAW_BUFFERS, /* surface */
+ SURF_INDEX_TEXTURE(unit),
unit, /* sampler */
inst->DstReg.WriteMask, /* writemask */
BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, /* msg_type */
@@ -2528,7 +2669,7 @@ static void emit_txb(struct brw_wm_compile *c,
static void emit_tex(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
struct brw_reg dst[4], src[4], payload_reg;
@@ -2581,7 +2722,7 @@ static void emit_tex(struct brw_wm_compile *c,
retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */
1, /* msg_reg_nr */
retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */
- unit + MAX_DRAW_BUFFERS, /* surface */
+ SURF_INDEX_TEXTURE(unit),
unit, /* sampler */
inst->DstReg.WriteMask, /* writemask */
BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, /* msg_type */
@@ -2612,28 +2753,31 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
struct brw_compile *p = &c->func;
struct brw_indirect stack_index = brw_indirect(0, 0);
- c->reg_index = 0;
+ c->out_of_regs = GL_FALSE;
+
prealloc_reg(c);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
for (i = 0; i < c->nr_fp_insns; i++) {
- struct prog_instruction *inst = &c->prog_instructions[i];
+ const struct prog_instruction *inst = &c->prog_instructions[i];
- if (inst->CondUpdate)
- brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
- else
- brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
+ c->cur_inst = i;
- /*
+#if 0
_mesa_printf("Inst %d: ", i);
_mesa_print_instruction(inst);
- */
+#endif
/* fetch any constants that this instruction needs */
- if (c->use_const_buffer)
+ if (c->fp->use_const_buffer)
fetch_constants(c, inst);
+ if (inst->CondUpdate)
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ else
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
+
switch (inst->Opcode) {
case WM_PIXELXY:
emit_pixel_xy(c, inst);
@@ -2819,6 +2963,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
break;
case OPCODE_BGNLOOP:
+ /* XXX may need to invalidate the current_constant regs */
loop_inst[loop_insn++] = brw_DO(p, BRW_EXECUTE_8);
break;
case OPCODE_BRK:
@@ -2849,17 +2994,13 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
_mesa_printf("unsupported IR in fragment shader %d\n",
inst->Opcode);
}
+
if (inst->CondUpdate)
brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
else
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
post_wm_emit(c);
-
- if (c->reg_index >= BRW_WM_MAX_GRF) {
- _mesa_problem(NULL, "Ran out of registers in brw_wm_emit_glsl()");
- /* XXX we need to do some proper error recovery here */
- }
}
@@ -2883,6 +3024,6 @@ void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
brw_wm_print_program(c, "brw_wm_glsl_emit done");
}
- c->prog_data.total_grf = c->reg_index;
+ c->prog_data.total_grf = num_grf_used(c);
c->prog_data.total_scratch = 0;
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_iz.c b/src/mesa/drivers/dri/i965/brw_wm_iz.c
index bd60ac9b31..8fd067abe7 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_iz.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_iz.c
@@ -116,6 +116,10 @@ const struct {
{ C, 0, 1, 1, 1 }
};
+/**
+ * \param line_aa AA_NEVER, AA_ALWAYS or AA_SOMETIMES
+ * \param lookup bitmask of IZ_* flags
+ */
void brw_wm_lookup_iz( GLuint line_aa,
GLuint lookup,
struct brw_wm_prog_key *key )
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
index 2debd0678a..92142764f5 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
@@ -322,7 +322,7 @@ static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c,
newref->value->lastuse = newref;
}
- if (src.NegateBase & (1<<i))
+ if (src.Negate & (1 << i))
newref->hw_reg.negate ^= 1;
if (src.Abs) {
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c
index ab9aa2f10d..3436a24717 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass1.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c
@@ -159,6 +159,7 @@ void brw_wm_pass1( struct brw_wm_compile *c )
case OPCODE_FRC:
case OPCODE_MOV:
case OPCODE_SWZ:
+ case OPCODE_TRUNC:
read0 = writemask;
break;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass2.c b/src/mesa/drivers/dri/i965/brw_wm_pass2.c
index 780edbc42e..6faea018fb 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass2.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass2.c
@@ -69,8 +69,6 @@ static void prealloc_reg(struct brw_wm_compile *c,
*/
static void init_registers( struct brw_wm_compile *c )
{
- struct brw_context *brw = c->func.brw;
- GLuint inputs = (brw->vs.prog_data->outputs_written & DO_SETUP_BITS);
GLuint nr_interp_regs = 0;
GLuint i = 0;
GLuint j;
@@ -85,16 +83,19 @@ static void init_registers( struct brw_wm_compile *c )
prealloc_reg(c, &c->creg[j], i++);
for (j = 0; j < FRAG_ATTRIB_MAX; j++) {
- if (inputs & (1<<j)) {
- /* index for vs output and ps input are not the same
- in shader varying */
- GLuint index;
- if (j > FRAG_ATTRIB_VAR0)
- index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0);
+ if (c->key.vp_outputs_written & (1<<j)) {
+ int fp_index;
+
+ if (j >= VERT_RESULT_VAR0)
+ fp_index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0);
+ else if (j <= VERT_RESULT_TEX7)
+ fp_index = j;
else
- index = j;
+ fp_index = -1;
+
nr_interp_regs++;
- prealloc_reg(c, &c->payload.input_interp[index], i++);
+ if (fp_index >= 0)
+ prealloc_reg(c, &c->payload.input_interp[fp_index], i++);
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
index 1fc9f01372..3fc18ff1f3 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
@@ -152,7 +152,7 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key,
sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC;
if (key->max_aniso > 2.0) {
- sampler->ss3.max_aniso = MAX2((key->max_aniso - 2) / 2,
+ sampler->ss3.max_aniso = MIN2((key->max_aniso - 2) / 2,
BRW_ANISORATIO_16);
}
}
@@ -178,6 +178,16 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key,
sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE;
sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE;
}
+ else if (key->tex_target == GL_TEXTURE_1D) {
+ /* There's a bug in 1D texture sampling - it actually pays
+ * attention to the wrap_t value, though it should not.
+ * Override the wrap_t value here to GL_REPEAT to keep
+ * any nonexistent border pixels from floating in.
+ */
+ sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r);
+ sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s);
+ sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
+ }
else {
sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r);
sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s);
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index 58fa6aaf8f..67b41173fb 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -290,7 +290,7 @@ const struct brw_tracked_state brw_wm_unit = {
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_CURBE_OFFSETS |
- BRW_NEW_NR_SURFACES),
+ BRW_NEW_NR_WM_SURFACES),
.cache = (CACHE_NEW_WM_PROG |
CACHE_NEW_SAMPLER)
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index e7d55d5dbd..c49a5f6b4e 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -38,7 +38,7 @@
#include "intel_mipmap_tree.h"
#include "intel_batchbuffer.h"
#include "intel_tex.h"
-
+#include "intel_fbo.h"
#include "brw_context.h"
#include "brw_state.h"
@@ -176,17 +176,6 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format,
}
}
-struct brw_wm_surface_key {
- GLenum target, depthmode;
- dri_bo *bo;
- GLint format, internal_format;
- GLint first_level, last_level;
- GLint width, height, depth;
- GLint pitch, cpp;
- uint32_t tiling;
- GLuint offset;
-};
-
static void
brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling)
{
@@ -208,7 +197,7 @@ brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling)
static dri_bo *
brw_create_texture_surface( struct brw_context *brw,
- struct brw_wm_surface_key *key )
+ struct brw_surface_key *key )
{
struct brw_surface_state surf;
dri_bo *bo;
@@ -263,7 +252,7 @@ brw_create_texture_surface( struct brw_context *brw,
surf.ss0.cube_neg_z = 1;
}
- bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
+ bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
key, sizeof(*key),
&key->bo, key->bo ? 1 : 0,
&surf, sizeof(surf),
@@ -287,8 +276,8 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
struct intel_texture_object *intelObj = intel_texture_object(tObj);
struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel];
- struct brw_wm_surface_key key;
- const GLuint j = MAX_DRAW_BUFFERS + unit;
+ struct brw_surface_key key;
+ const GLuint surf = SURF_INDEX_TEXTURE(unit);
memset(&key, 0, sizeof(key));
@@ -315,25 +304,26 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
key.cpp = intelObj->mt->cpp;
key.tiling = intelObj->mt->region->tiling;
- dri_bo_unreference(brw->wm.surf_bo[j]);
- brw->wm.surf_bo[j] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
- &key, sizeof(key),
- &key.bo, key.bo ? 1 : 0,
- NULL);
- if (brw->wm.surf_bo[j] == NULL) {
- brw->wm.surf_bo[j] = brw_create_texture_surface(brw, &key);
+ dri_bo_unreference(brw->wm.surf_bo[surf]);
+ brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
+ BRW_SS_SURFACE,
+ &key, sizeof(key),
+ &key.bo, key.bo ? 1 : 0,
+ NULL);
+ if (brw->wm.surf_bo[surf] == NULL) {
+ brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key);
}
}
/**
- * Create the constant buffer surface. Fragment shader constanst will be
+ * Create the constant buffer surface. Vertex/fragment shader constants will be
* read from this buffer with Data Port Read instructions/messages.
*/
-static dri_bo *
+dri_bo *
brw_create_constant_surface( struct brw_context *brw,
- struct brw_wm_surface_key *key )
+ struct brw_surface_key *key )
{
const GLint w = key->width - 1;
struct brw_surface_state surf;
@@ -345,8 +335,6 @@ brw_create_constant_surface( struct brw_context *brw,
surf.ss0.surface_type = BRW_SURFACE_BUFFER;
surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
- /* This is ok for all textures with channel width 8bit or less:
- */
assert(key->bo);
if (key->bo)
surf.ss1.base_addr = key->bo->offset; /* reloc */
@@ -356,10 +344,10 @@ brw_create_constant_surface( struct brw_context *brw,
surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */
surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */
surf.ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */
- surf.ss3.pitch = (key->pitch * key->cpp) - 1;
- brw_set_surface_tiling(&surf, key->tiling);
+ surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */
+ brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */
- bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
+ bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
key, sizeof(*key),
&key->bo, key->bo ? 1 : 0,
&surf, sizeof(surf),
@@ -377,28 +365,73 @@ brw_create_constant_surface( struct brw_context *brw,
return bo;
}
+/* Creates a new WM constant buffer reflecting the current fragment program's
+ * constants, if needed by the fragment program.
+ *
+ * Otherwise, constants go through the CURBEs using the brw_constant_buffer
+ * state atom.
+ */
+static drm_intel_bo *
+brw_wm_update_constant_buffer(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+ struct brw_fragment_program *fp =
+ (struct brw_fragment_program *) brw->fragment_program;
+ const struct gl_program_parameter_list *params = fp->program.Base.Parameters;
+ const int size = params->NumParameters * 4 * sizeof(GLfloat);
+ drm_intel_bo *const_buffer;
+
+ /* BRW_NEW_FRAGMENT_PROGRAM */
+ if (!fp->use_const_buffer)
+ return NULL;
+
+ const_buffer = drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer",
+ size, 64);
+
+ /* _NEW_PROGRAM_CONSTANTS */
+ dri_bo_subdata(const_buffer, 0, size, params->ParameterValues);
+
+ return const_buffer;
+}
/**
- * Update the constant buffer surface.
+ * Update the surface state for a WM constant buffer.
+ * The constant buffer will be (re)allocated here if needed.
*/
static void
-brw_update_constant_surface( GLcontext *ctx,
- const struct brw_fragment_program *fp )
+brw_update_wm_constant_surface( GLcontext *ctx,
+ GLuint surf)
{
struct brw_context *brw = brw_context(ctx);
- struct brw_wm_surface_key key;
- const GLuint j = BRW_WM_MAX_SURF - 1;
- const GLuint numParams = fp->program.Base.Parameters->NumParameters;
+ struct brw_surface_key key;
+ struct brw_fragment_program *fp =
+ (struct brw_fragment_program *) brw->fragment_program;
+ const struct gl_program_parameter_list *params =
+ fp->program.Base.Parameters;
+
+ /* If we're in this state update atom, we need to update WM constants, so
+ * free the old buffer and create a new one for the new contents.
+ */
+ dri_bo_unreference(fp->const_buffer);
+ fp->const_buffer = brw_wm_update_constant_buffer(brw);
+
+ /* If there's no constant buffer, then no surface BO is needed to point at
+ * it.
+ */
+ if (fp->const_buffer == 0) {
+ drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
+ brw->wm.surf_bo[surf] = NULL;
+ return;
+ }
memset(&key, 0, sizeof(key));
key.format = MESA_FORMAT_RGBA_FLOAT32;
key.internal_format = GL_RGBA;
key.bo = fp->const_buffer;
-
key.depthmode = GL_NONE;
- key.pitch = numParams;
- key.width = numParams;
+ key.pitch = params->NumParameters;
+ key.width = params->NumParameters;
key.height = 1;
key.depth = 1;
key.cpp = 16;
@@ -409,16 +442,60 @@ brw_update_constant_surface( GLcontext *ctx,
key.width, key.height, key.depth, key.cpp, key.pitch);
*/
- dri_bo_unreference(brw->wm.surf_bo[j]);
- brw->wm.surf_bo[j] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
- &key, sizeof(key),
- &key.bo, key.bo ? 1 : 0,
- NULL);
- if (brw->wm.surf_bo[j] == NULL) {
- brw->wm.surf_bo[j] = brw_create_constant_surface(brw, &key);
+ dri_bo_unreference(brw->wm.surf_bo[surf]);
+ brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
+ BRW_SS_SURFACE,
+ &key, sizeof(key),
+ &key.bo, key.bo ? 1 : 0,
+ NULL);
+ if (brw->wm.surf_bo[surf] == NULL) {
+ brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key);
}
+ brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
}
+/**
+ * Updates surface / buffer for fragment shader constant buffer, if
+ * one is required.
+ *
+ * This consumes the state updates for the constant buffer, and produces
+ * BRW_NEW_WM_SURFACES to get picked up by brw_prepare_wm_surfaces for
+ * inclusion in the binding table.
+ */
+static void prepare_wm_constant_surface(struct brw_context *brw )
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ struct brw_fragment_program *fp =
+ (struct brw_fragment_program *) brw->fragment_program;
+ GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER;
+
+ drm_intel_bo_unreference(fp->const_buffer);
+ fp->const_buffer = brw_wm_update_constant_buffer(brw);
+
+ /* If there's no constant buffer, then no surface BO is needed to point at
+ * it.
+ */
+ if (fp->const_buffer == 0) {
+ if (brw->wm.surf_bo[surf] != NULL) {
+ drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
+ brw->wm.surf_bo[surf] = NULL;
+ brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
+ }
+ return;
+ }
+
+ brw_update_wm_constant_surface(ctx, surf);
+}
+
+const struct brw_tracked_state brw_wm_constant_surface = {
+ .dirty = {
+ .mesa = (_NEW_PROGRAM_CONSTANTS),
+ .brw = (BRW_NEW_FRAGMENT_PROGRAM),
+ .cache = 0
+ },
+ .prepare = prepare_wm_constant_surface,
+};
+
/**
* Sets up a surface state structure to point at the given region.
@@ -426,18 +503,22 @@ brw_update_constant_surface( GLcontext *ctx,
* usable for further buffers when doing ARB_draw_buffer support.
*/
static void
-brw_update_region_surface(struct brw_context *brw, struct intel_region *region,
- unsigned int unit, GLboolean cached)
+brw_update_renderbuffer_surface(struct brw_context *brw,
+ struct gl_renderbuffer *rb,
+ unsigned int unit)
{
GLcontext *ctx = &brw->intel.ctx;
dri_bo *region_bo = NULL;
+ struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+ struct intel_region *region = irb ? irb->region : NULL;
struct {
unsigned int surface_type;
unsigned int surface_format;
- unsigned int width, height, cpp;
+ unsigned int width, height, pitch, cpp;
GLubyte color_mask[4];
GLboolean color_blend;
uint32_t tiling;
+ uint32_t draw_offset;
} key;
memset(&key, 0, sizeof(key));
@@ -446,14 +527,29 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region,
region_bo = region->buffer;
key.surface_type = BRW_SURFACE_2D;
- if (region->cpp == 4)
+ switch (irb->texformat->MesaFormat) {
+ case MESA_FORMAT_ARGB8888:
key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
- else
+ break;
+ case MESA_FORMAT_RGB565:
key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
+ break;
+ case MESA_FORMAT_ARGB1555:
+ key.surface_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
+ break;
+ case MESA_FORMAT_ARGB4444:
+ key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
+ break;
+ default:
+ _mesa_problem(ctx, "Bad renderbuffer format: %d\n",
+ irb->texformat->MesaFormat);
+ }
key.tiling = region->tiling;
- key.width = region->pitch; /* XXX: not really! */
+ key.width = region->width;
key.height = region->height;
+ key.pitch = region->pitch;
key.cpp = region->cpp;
+ key.draw_offset = region->draw_offset; /* cur 3d or cube face offset */
} else {
key.surface_type = BRW_SURFACE_NULL;
key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
@@ -461,6 +557,7 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region,
key.width = 1;
key.height = 1;
key.cpp = 4;
+ key.draw_offset = 0;
}
memcpy(key.color_mask, ctx->Color.ColorMask,
sizeof(key.color_mask));
@@ -468,12 +565,11 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region,
ctx->Color.BlendEnabled);
dri_bo_unreference(brw->wm.surf_bo[unit]);
- brw->wm.surf_bo[unit] = NULL;
- if (cached)
- brw->wm.surf_bo[unit] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
- &key, sizeof(key),
- &region_bo, 1,
- NULL);
+ brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache,
+ BRW_SS_SURFACE,
+ &key, sizeof(key),
+ &region_bo, 1,
+ NULL);
if (brw->wm.surf_bo[unit] == NULL) {
struct brw_surface_state surf;
@@ -482,13 +578,14 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region,
surf.ss0.surface_format = key.surface_format;
surf.ss0.surface_type = key.surface_type;
+ surf.ss1.base_addr = key.draw_offset;
if (region_bo != NULL)
- surf.ss1.base_addr = region_bo->offset; /* reloc */
+ surf.ss1.base_addr += region_bo->offset; /* reloc */
surf.ss2.width = key.width - 1;
surf.ss2.height = key.height - 1;
brw_set_surface_tiling(&surf, key.tiling);
- surf.ss3.pitch = (key.width * key.cpp) - 1;
+ surf.ss3.pitch = (key.pitch * key.cpp) - 1;
/* _NEW_COLOR */
surf.ss0.color_blend = key.color_blend;
@@ -498,8 +595,9 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region,
surf.ss0.writedisable_alpha = !key.color_mask[3];
/* Key size will never match key size for textures, so we're safe. */
- brw->wm.surf_bo[unit] = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
- &key, sizeof(key),
+ brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache,
+ BRW_SS_SURFACE,
+ &key, sizeof(key),
&region_bo, 1,
&surf, sizeof(surf),
NULL, NULL);
@@ -508,12 +606,12 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region,
* them both. We might be able to figure out from other state
* a more restrictive relocation to emit.
*/
- dri_bo_emit_reloc(brw->wm.surf_bo[unit],
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER,
- 0,
- offsetof(struct brw_surface_state, ss1),
- region_bo);
+ drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit],
+ offsetof(struct brw_surface_state, ss1),
+ region_bo,
+ key.draw_offset,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER);
}
}
}
@@ -528,7 +626,9 @@ brw_wm_get_binding_table(struct brw_context *brw)
{
dri_bo *bind_bo;
- bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND,
+ assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF);
+
+ bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
NULL, 0,
brw->wm.surf_bo, brw->wm.nr_surfaces,
NULL);
@@ -544,7 +644,7 @@ brw_wm_get_binding_table(struct brw_context *brw)
else
data[i] = 0;
- bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND,
+ bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
NULL, 0,
brw->wm.surf_bo, brw->wm.nr_surfaces,
data, data_size,
@@ -574,66 +674,62 @@ static void prepare_wm_surfaces(struct brw_context *brw )
GLuint i;
int old_nr_surfaces;
+ /* _NEW_BUFFERS */
/* Update surfaces for drawing buffers */
- if (brw->state.nr_color_regions > 1) {
- for (i = 0; i < brw->state.nr_color_regions; i++) {
- brw_update_region_surface(brw, brw->state.color_regions[i], i,
- GL_FALSE);
+ if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
+ for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
+ brw_update_renderbuffer_surface(brw,
+ ctx->DrawBuffer->_ColorDrawBuffers[i],
+ i);
}
} else {
- brw_update_region_surface(brw, brw->state.color_regions[0], 0, GL_TRUE);
+ brw_update_renderbuffer_surface(brw, NULL, 0);
}
old_nr_surfaces = brw->wm.nr_surfaces;
brw->wm.nr_surfaces = MAX_DRAW_BUFFERS;
+ if (brw->wm.surf_bo[SURF_INDEX_FRAG_CONST_BUFFER] != NULL)
+ brw->wm.nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1;
+
/* Update surfaces for textures */
for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
- const GLuint j = MAX_DRAW_BUFFERS + i;
+ const GLuint surf = SURF_INDEX_TEXTURE(i);
/* _NEW_TEXTURE, BRW_NEW_TEXDATA */
if (texUnit->_ReallyEnabled) {
if (texUnit->_Current == intel->frame_buffer_texobj) {
/* render to texture */
- dri_bo_unreference(brw->wm.surf_bo[j]);
- brw->wm.surf_bo[j] = brw->wm.surf_bo[0];
- dri_bo_reference(brw->wm.surf_bo[j]);
- brw->wm.nr_surfaces = j + 1;
+ dri_bo_unreference(brw->wm.surf_bo[surf]);
+ brw->wm.surf_bo[surf] = brw->wm.surf_bo[0];
+ dri_bo_reference(brw->wm.surf_bo[surf]);
+ brw->wm.nr_surfaces = surf + 1;
} else {
/* regular texture */
brw_update_texture_surface(ctx, i);
- brw->wm.nr_surfaces = j + 1;
+ brw->wm.nr_surfaces = surf + 1;
}
} else {
- dri_bo_unreference(brw->wm.surf_bo[j]);
- brw->wm.surf_bo[j] = NULL;
+ dri_bo_unreference(brw->wm.surf_bo[surf]);
+ brw->wm.surf_bo[surf] = NULL;
}
}
- /* Update surface for fragment shader constant buffer */
- {
- const GLuint j = BRW_WM_MAX_SURF - 1;
- const struct brw_fragment_program *fp =
- brw_fragment_program_const(brw->fragment_program);
-
- brw_update_constant_surface(ctx, fp);
- brw->wm.nr_surfaces = j + 1;
- }
-
-
dri_bo_unreference(brw->wm.bind_bo);
brw->wm.bind_bo = brw_wm_get_binding_table(brw);
if (brw->wm.nr_surfaces != old_nr_surfaces)
- brw->state.dirty.brw |= BRW_NEW_NR_SURFACES;
+ brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES;
}
-
const struct brw_tracked_state brw_wm_surfaces = {
.dirty = {
- .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS,
- .brw = BRW_NEW_CONTEXT,
+ .mesa = (_NEW_COLOR |
+ _NEW_TEXTURE |
+ _NEW_BUFFERS),
+ .brw = (BRW_NEW_CONTEXT |
+ BRW_NEW_WM_SURFACES),
.cache = 0
},
.prepare = prepare_wm_surfaces,
diff --git a/src/mesa/drivers/dri/i965/intel_generatemipmap.c b/src/mesa/drivers/dri/i965/intel_generatemipmap.c
new file mode 120000
index 0000000000..4c6b37ada0
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_generatemipmap.c
@@ -0,0 +1 @@
+../intel/intel_generatemipmap.c \ No newline at end of file