summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/i965
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r--src/mesa/drivers/dri/i965/Makefile3
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip.c22
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_line.c12
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_point.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_state.c9
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_tri.c13
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_unfilled.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_util.c27
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h13
-rw-r--r--src/mesa/drivers/dri/i965/brw_curbe.c77
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h29
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw_upload.c49
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h24
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c294
-rw-r--r--src/mesa/drivers/dri/i965/brw_fallback.c31
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs.c9
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs_emit.c30
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs_state.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_misc_state.c44
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf.c5
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf_emit.c41
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf_state.c27
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h25
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_cache.c159
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_dump.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c51
-rw-r--r--src/mesa/drivers/dri/i965/brw_structs.h124
-rw-r--r--src/mesa/drivers/dri/i965/brw_tex_layout.c146
-rw-r--r--src/mesa/drivers/dri/i965/brw_urb.c27
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c179
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_state.c18
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_surface_state.c226
-rw-r--r--src/mesa/drivers/dri/i965/brw_vtbl.c11
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.h8
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c53
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_fp.c26
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_glsl.c347
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_iz.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_pass1.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_state.c16
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c342
l---------src/mesa/drivers/dri/i965/intel_generatemipmap.c1
l---------src/mesa/drivers/dri/i965/intel_pixel_read.c1
49 files changed, 1835 insertions, 720 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index 2934414d99..00a42111da 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -14,6 +14,7 @@ DRIVER_SOURCES = \
intel_decode.c \
intel_extensions.c \
intel_fbo.c \
+ intel_generatemipmap.c \
intel_mipmap_tree.c \
intel_regions.c \
intel_screen.c \
@@ -22,6 +23,7 @@ DRIVER_SOURCES = \
intel_pixel_bitmap.c \
intel_pixel_copy.c \
intel_pixel_draw.c \
+ intel_pixel_read.c \
intel_state.c \
intel_swapbuffers.c \
intel_tex.c \
@@ -69,6 +71,7 @@ DRIVER_SOURCES = \
brw_vs_constval.c \
brw_vs_emit.c \
brw_vs_state.c \
+ brw_vs_surface_state.c \
brw_vtbl.c \
brw_wm.c \
brw_wm_debug.c \
diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c
index 5cffcebde4..54d30a3f42 100644
--- a/src/mesa/drivers/dri/i965/brw_clip.c
+++ b/src/mesa/drivers/dri/i965/brw_clip.c
@@ -65,21 +65,31 @@ static void compile_clip_prog( struct brw_context *brw,
c.func.single_program_flow = 1;
c.key = *key;
-
+ c.need_ff_sync = BRW_IS_IGDNG(brw);
/* Need to locate the two positions present in vertex + header.
* These are currently hardcoded:
*/
c.header_position_offset = ATTR_SIZE;
- for (i = 0, delta = REG_SIZE; i < VERT_RESULT_MAX; i++)
+ if (BRW_IS_IGDNG(brw))
+ delta = 3 * REG_SIZE;
+ else
+ delta = REG_SIZE;
+
+ for (i = 0; i < VERT_RESULT_MAX; i++)
if (c.key.attrs & (1<<i)) {
c.offset[i] = delta;
delta += ATTR_SIZE;
}
c.nr_attrs = brw_count_bits(c.key.attrs);
- c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
+
+ if (BRW_IS_IGDNG(brw))
+ c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */
+ else
+ c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
+
c.nr_bytes = c.nr_regs * REG_SIZE;
c.prog_data.clip_mode = c.key.clip_mode; /* XXX */
@@ -148,7 +158,11 @@ static void upload_clip_prog(struct brw_context *brw)
key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
/* _NEW_TRANSFORM */
key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
- key.clip_mode = BRW_CLIPMODE_NORMAL;
+
+ if (BRW_IS_IGDNG(brw))
+ key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP;
+ else
+ key.clip_mode = BRW_CLIPMODE_NORMAL;
/* _NEW_POLYGON */
if (key.primitive == GL_TRIANGLES) {
diff --git a/src/mesa/drivers/dri/i965/brw_clip.h b/src/mesa/drivers/dri/i965/brw_clip.h
index e06747864b..12e8548df1 100644
--- a/src/mesa/drivers/dri/i965/brw_clip.h
+++ b/src/mesa/drivers/dri/i965/brw_clip.h
@@ -117,6 +117,7 @@ struct brw_clip_compile {
GLuint header_position_offset;
GLuint offset[VERT_ATTRIB_MAX];
+ GLboolean need_ff_sync;
};
#define ATTR_SIZE (4*4)
@@ -171,5 +172,5 @@ struct brw_reg get_tmp( struct brw_clip_compile *c );
void brw_clip_project_position(struct brw_clip_compile *c,
struct brw_reg pos );
-
+void brw_clip_ff_sync(struct brw_clip_compile *c);
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_clip_line.c b/src/mesa/drivers/dri/i965/brw_clip_line.c
index d830e49e50..9abd0642aa 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_line.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_line.c
@@ -130,7 +130,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
struct brw_instruction *plane_loop;
struct brw_instruction *plane_active;
struct brw_instruction *is_negative;
- struct brw_instruction *is_neg2;
+ struct brw_instruction *is_neg2 = NULL;
struct brw_instruction *not_culled;
struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD);
@@ -148,7 +148,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
brw_clip_init_clipmask(c);
/* -ve rhw workaround */
- if (!BRW_IS_G4X(p->brw)) {
+ if (BRW_IS_965(p->brw)) {
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
brw_imm_ud(1<<20));
@@ -185,7 +185,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
* Both can be negative on GM965/G965 due to RHW workaround
* if so, this object should be rejected.
*/
- if (!BRW_IS_G4X(p->brw)) {
+ if (BRW_IS_965(p->brw)) {
brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0));
is_neg2 = brw_IF(p, BRW_EXECUTE_1);
{
@@ -210,7 +210,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
/* If both are positive, do nothing */
/* Only on GM965/G965 */
- if (!BRW_IS_G4X(p->brw)) {
+ if (BRW_IS_965(p->brw)) {
brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0));
is_neg2 = brw_IF(p, BRW_EXECUTE_1);
}
@@ -225,7 +225,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
- if (!BRW_IS_G4X(p->brw)) {
+ if (BRW_IS_965(p->brw)) {
brw_ENDIF(p, is_neg2);
}
}
@@ -246,6 +246,8 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1);
brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0));
+ if (c->need_ff_sync)
+ brw_clip_ff_sync(c);
not_culled = brw_IF(p, BRW_EXECUTE_1);
{
brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, GL_FALSE);
diff --git a/src/mesa/drivers/dri/i965/brw_clip_point.c b/src/mesa/drivers/dri/i965/brw_clip_point.c
index d17b199b89..9738299168 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_point.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_point.c
@@ -50,5 +50,7 @@ void brw_emit_point_clip( struct brw_clip_compile *c )
/* Send an empty message to kill the thread:
*/
brw_clip_tri_alloc_regs(c, 0);
+ if (c->need_ff_sync)
+ brw_clip_ff_sync(c);
brw_clip_kill_thread(c);
}
diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c
index 9b0d7eab7b..5762c9577c 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_state.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_state.c
@@ -95,7 +95,14 @@ clip_unit_create_from_key(struct brw_context *brw,
* even number.
*/
assert(key->nr_urb_entries % 2 == 0);
- clip.thread4.max_threads = 2 - 1;
+
+ /* Although up to 16 concurrent Clip threads are allowed on IGDNG,
+ * only 2 threads can output VUEs at a time.
+ */
+ if (BRW_IS_IGDNG(brw))
+ clip.thread4.max_threads = 16 - 1;
+ else
+ clip.thread4.max_threads = 2 - 1;
} else {
assert(key->nr_urb_entries >= 5);
clip.thread4.max_threads = 1 - 1;
diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c
index 7fd37bd05f..4c2d655fb1 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_tri.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c
@@ -77,6 +77,10 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
if (c->nr_attrs & 1) {
for (j = 0; j < 3; j++) {
GLuint delta = c->nr_attrs*16 + 32;
+
+ if (BRW_IS_IGDNG(c->func.brw))
+ delta = c->nr_attrs * 16 + 32 * 3;
+
brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0));
}
}
@@ -562,7 +566,7 @@ void brw_emit_tri_clip( struct brw_clip_compile *c )
/* if -ve rhw workaround bit is set,
do cliptest */
- if (!BRW_IS_G4X(p->brw)) {
+ if (BRW_IS_965(p->brw)) {
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
brw_imm_ud(1<<20));
@@ -579,11 +583,14 @@ void brw_emit_tri_clip( struct brw_clip_compile *c )
if (c->key.do_flat_shading)
brw_clip_tri_flat_shade(c);
- if (c->key.clip_mode == BRW_CLIPMODE_NORMAL)
+ if ((c->key.clip_mode == BRW_CLIPMODE_NORMAL) ||
+ (c->key.clip_mode == BRW_CLIPMODE_KERNEL_CLIP))
do_clip_tri(c);
else
maybe_do_clip_tri(c);
-
+
+ if (c->need_ff_sync)
+ brw_clip_ff_sync(c);
brw_clip_tri_emit_polygon(c);
/* Send an empty message to kill the thread:
diff --git a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c
index d7ca517927..26950383c1 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c
@@ -496,6 +496,8 @@ void brw_emit_unfilled_clip( struct brw_clip_compile *c )
}
brw_ENDIF(p, do_clip);
+ if (c->need_ff_sync)
+ brw_clip_ff_sync(c);
emit_unfilled_primitives(c);
brw_clip_kill_thread(c);
}
diff --git a/src/mesa/drivers/dri/i965/brw_clip_util.c b/src/mesa/drivers/dri/i965/brw_clip_util.c
index 9d3b0be694..e09efc07ed 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_util.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_util.c
@@ -140,6 +140,10 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
/* Just copy the vertex header:
*/
+ /*
+ * After CLIP stage, only first 256 bits of the VUE are read
+ * back on IGDNG, so needn't change it
+ */
brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1);
/* Iterate over each attribute (could be done in pairs?)
@@ -147,6 +151,9 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
for (i = 0; i < c->nr_attrs; i++) {
GLuint delta = i*16 + 32;
+ if (BRW_IS_IGDNG(p->brw))
+ delta = i * 16 + 32 * 3;
+
if (delta == c->offset[VERT_RESULT_EDGE]) {
if (force_edgeflag)
brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1));
@@ -177,6 +184,10 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
if (i & 1) {
GLuint delta = i*16 + 32;
+
+ if (BRW_IS_IGDNG(p->brw))
+ delta = i * 16 + 32 * 3;
+
brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0));
}
@@ -343,3 +354,19 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c )
}
}
+void brw_clip_ff_sync(struct brw_clip_compile *c)
+{
+ struct brw_compile *p = &c->func;
+ brw_ff_sync(p,
+ c->reg.R0,
+ 0,
+ c->reg.R0,
+ 1,
+ 1, /* used */
+ 1, /* msg length */
+ 1, /* response length */
+ 0, /* eot */
+ 1, /* write compelete */
+ 0, /* urb offset */
+ BRW_URB_SWIZZLE_NONE);
+}
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 577497bf6b..57ddf75413 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -129,8 +129,8 @@ struct brw_context;
#define BRW_NEW_PRIMITIVE 0x40
#define BRW_NEW_CONTEXT 0x80
#define BRW_NEW_WM_INPUT_DIMENSIONS 0x100
-#define BRW_NEW_INPUT_VARYING 0x200
#define BRW_NEW_PSP 0x800
+#define BRW_NEW_WM_SURFACES 0x1000
#define BRW_NEW_FENCE 0x2000
#define BRW_NEW_INDICES 0x4000
#define BRW_NEW_VERTICES 0x8000
@@ -401,7 +401,6 @@ struct brw_vertex_element {
struct brw_vertex_info {
- GLuint varying; /* varying:1[VERT_ATTRIB_MAX] */
GLuint sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[VERT_ATTRIB_MAX] */
};
@@ -450,8 +449,6 @@ struct brw_context
struct {
struct brw_state_flags dirty;
- struct brw_tracked_state **atoms;
- GLuint nr_atoms;
GLuint nr_color_regions;
struct intel_region *color_regions[MAX_DRAW_BUFFERS];
@@ -471,7 +468,8 @@ struct brw_context
int validated_bo_count;
} state;
- struct brw_cache cache;
+ struct brw_cache cache; /** non-surface items */
+ struct brw_cache surface_cache; /* surface items */
struct brw_cached_batch_item *cached_batch_items;
struct {
@@ -555,11 +553,6 @@ struct brw_context
GLuint vs_size;
GLuint total_size;
- /* Dynamic tracker which changes to reflect the state referenced
- * by active fp and vp program parameters:
- */
- struct brw_tracked_state tracked_state;
-
dri_bo *curbe_bo;
/** Offset within curbe_bo of space for current curbe entry */
GLuint curbe_offset;
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index 9197fede2d..a1a6c53d0e 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -36,6 +36,7 @@
#include "main/macros.h"
#include "main/enums.h"
#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
#include "shader/prog_statevars.h"
#include "intel_batchbuffer.h"
#include "intel_regions.h"
@@ -188,13 +189,6 @@ static void prepare_constant_buffer(struct brw_context *brw)
GLfloat *buf;
GLuint i;
- /* Update our own dependency flags. This works because this
- * function will also be called whenever fp or vp changes.
- */
- brw->curbe.tracked_state.dirty.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION);
- brw->curbe.tracked_state.dirty.mesa |= vp->program.Base.Parameters->StateFlags;
- brw->curbe.tracked_state.dirty.mesa |= fp->program.Base.Parameters->StateFlags;
-
if (sz == 0) {
if (brw->curbe.last_buf) {
free(brw->curbe.last_buf);
@@ -335,78 +329,11 @@ static void prepare_constant_buffer(struct brw_context *brw)
*/
}
-
-/**
- * Copy Mesa program parameters into given constant buffer.
- */
-static void
-update_constant_buffer(struct brw_context *brw,
- const struct gl_program_parameter_list *params,
- dri_bo *const_buffer)
-{
- struct intel_context *intel = &brw->intel;
- const int size = params->NumParameters * 4 * sizeof(GLfloat);
-
- /* copy Mesa program constants into the buffer */
- if (const_buffer && size > 0) {
-
- assert(const_buffer);
- assert(const_buffer->size >= size);
-
- if (intel->intelScreen->kernel_exec_fencing) {
- drm_intel_gem_bo_map_gtt(const_buffer);
- memcpy(const_buffer->virtual, params->ParameterValues, size);
- drm_intel_gem_bo_unmap_gtt(const_buffer);
- }
- else {
- dri_bo_subdata(const_buffer, 0, size, params->ParameterValues);
- }
-
- if (0) {
- int i;
- for (i = 0; i < params->NumParameters; i++) {
- float *p = params->ParameterValues[i];
- printf("%d: %f %f %f %f\n", i, p[0], p[1], p[2], p[3]);
- }
- }
- }
-}
-
-
-/** Copy current vertex program's parameters into the constant buffer */
-static void
-update_vertex_constant_buffer(struct brw_context *brw)
-{
- struct brw_vertex_program *vp =
- (struct brw_vertex_program *) brw->vertex_program;
- if (0) {
- printf("update VS constants in buffer %p\n", vp->const_buffer);
- printf("program %u\n", vp->program.Base.Id);
- }
- if (vp->use_const_buffer)
- update_constant_buffer(brw, vp->program.Base.Parameters, vp->const_buffer);
-}
-
-
-/** Copy current fragment program's parameters into the constant buffer */
-static void
-update_fragment_constant_buffer(struct brw_context *brw)
-{
- struct brw_fragment_program *fp =
- (struct brw_fragment_program *) brw->fragment_program;
- if (fp->use_const_buffer)
- update_constant_buffer(brw, fp->program.Base.Parameters, fp->const_buffer);
-}
-
-
static void emit_constant_buffer(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
GLuint sz = brw->curbe.total_size;
- update_vertex_constant_buffer(brw);
- update_fragment_constant_buffer(brw);
-
BEGIN_BATCH(2, IGNORE_CLIPRECTS);
if (sz == 0) {
OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
@@ -428,7 +355,7 @@ static void emit_constant_buffer(struct brw_context *brw)
*/
const struct brw_tracked_state brw_constant_buffer = {
.dirty = {
- .mesa = (_NEW_TRANSFORM|_NEW_PROJECTION), /* plus fp and vp flags */
+ .mesa = _NEW_PROGRAM_CONSTANTS,
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_VERTEX_PROGRAM |
BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 98fc909c2a..d166250b4f 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -139,6 +139,7 @@
#define BRW_CLIPMODE_CLIP_NON_REJECTED 2
#define BRW_CLIPMODE_REJECT_ALL 3
#define BRW_CLIPMODE_ACCEPT_ALL 4
+#define BRW_CLIPMODE_KERNEL_CLIP 5
#define BRW_CLIP_NDCSPACE 0
#define BRW_CLIP_SCREENSPACE 1
@@ -670,6 +671,25 @@
#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3
#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG 0
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_IGDNG 0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG 0
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG 1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_BIAS_IGDNG 1
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG 1
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_IGDNG 2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_IGDNG 2
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD_IGDNG 2
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG 3
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE_IGDNG 3
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG 3
+
+/* for IGDNG only */
+#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0
+#define BRW_SAMPLER_SIMD_MODE_SIMD8 1
+#define BRW_SAMPLER_SIMD_MODE_SIMD16 2
+#define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3
+
#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2
@@ -819,8 +839,11 @@
#include "intel_chipset.h"
#define BRW_IS_G4X(brw) (IS_G4X((brw)->intel.intelScreen->deviceID))
-#define CMD_PIPELINE_SELECT(brw) (BRW_IS_G4X(brw) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965)
-#define CMD_VF_STATISTICS(brw) (BRW_IS_G4X(brw) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965)
-#define URB_SIZES(brw) (BRW_IS_G4X(brw) ? 384 : 256) /* 512 bit units */
+#define BRW_IS_IGDNG(brw) (IS_IGDNG((brw)->intel.intelScreen->deviceID))
+#define BRW_IS_965(brw) (!(BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)))
+#define CMD_PIPELINE_SELECT(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965)
+#define CMD_VF_STATISTICS(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965)
+#define URB_SIZES(brw) (BRW_IS_IGDNG(brw) ? 1024 : \
+ (BRW_IS_G4X(brw) ? 384 : 256)) /* 512 bit units */
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 5342622a73..5152c3f3a5 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -187,19 +187,13 @@ static void brw_merge_inputs( struct brw_context *brw,
brw->vb.inputs[i].glarray = arrays[i];
if (arrays[i]->StrideB != 0)
- brw->vb.info.varying |= 1 << i;
-
brw->vb.info.sizes[i/16] |= (brw->vb.inputs[i].glarray->Size - 1) <<
((i%16) * 2);
}
- /* Raise statechanges if input sizes and varying have changed:
- */
+ /* Raise statechanges if input sizes have changed. */
if (memcmp(brw->vb.info.sizes, old.sizes, sizeof(old.sizes)) != 0)
brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS;
-
- if (brw->vb.info.varying != old.varying)
- brw->state.dirty.brw |= BRW_NEW_INPUT_VARYING;
}
/* XXX: could split the primitive list to fallback only on the
@@ -416,6 +410,8 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
out:
UNLOCK_HARDWARE(intel);
+ brw_state_cache_check_size(brw);
+
if (warn)
fprintf(stderr, "i965: Single primitive emit potentially exceeded "
"available aperture space\n");
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index b91b20bec6..c29f1dd5c0 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -343,7 +343,7 @@ static void brw_prepare_vertices(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
struct intel_context *intel = intel_context(ctx);
- GLuint tmp = brw->vs.prog_data->inputs_read;
+ GLbitfield vs_inputs = brw->vs.prog_data->inputs_read;
GLuint i;
const unsigned char *ptr = NULL;
GLuint interleave = 0;
@@ -362,11 +362,11 @@ static void brw_prepare_vertices(struct brw_context *brw)
_mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
/* Accumulate the list of enabled arrays. */
- while (tmp) {
- GLuint i = _mesa_ffsll(tmp)-1;
+ while (vs_inputs) {
+ GLuint i = _mesa_ffsll(vs_inputs) - 1;
struct brw_vertex_element *input = &brw->vb.inputs[i];
- tmp &= ~(1<<i);
+ vs_inputs &= ~(1 << i);
enabled[nr_enabled++] = input;
}
@@ -477,17 +477,17 @@ static void brw_emit_vertices(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
struct intel_context *intel = intel_context(ctx);
- GLuint tmp = brw->vs.prog_data->inputs_read;
+ GLbitfield vs_inputs = brw->vs.prog_data->inputs_read;
struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
GLuint i;
GLuint nr_enabled = 0;
/* Accumulate the list of enabled arrays. */
- while (tmp) {
- i = _mesa_ffsll(tmp)-1;
+ while (vs_inputs) {
+ i = _mesa_ffsll(vs_inputs) - 1;
struct brw_vertex_element *input = &brw->vb.inputs[i];
- tmp &= ~(1<<i);
+ vs_inputs &= ~(1 << i);
enabled[nr_enabled++] = input;
}
@@ -512,7 +512,19 @@ static void brw_emit_vertices(struct brw_context *brw)
OUT_RELOC(input->bo,
I915_GEM_DOMAIN_VERTEX, 0,
input->offset);
- OUT_BATCH(brw->vb.max_index);
+ if (BRW_IS_IGDNG(brw)) {
+ if (input->stride) {
+ OUT_RELOC(input->bo,
+ I915_GEM_DOMAIN_VERTEX, 0,
+ input->offset + input->stride * input->count);
+ } else {
+ assert(input->count == 1);
+ OUT_RELOC(input->bo,
+ I915_GEM_DOMAIN_VERTEX, 0,
+ input->offset + input->element_size);
+ }
+ } else
+ OUT_BATCH(brw->vb.max_index);
OUT_BATCH(0); /* Instance data step rate */
}
ADVANCE_BATCH();
@@ -542,11 +554,18 @@ static void brw_emit_vertices(struct brw_context *brw)
BRW_VE0_VALID |
(format << BRW_VE0_FORMAT_SHIFT) |
(0 << BRW_VE0_SRC_OFFSET_SHIFT));
- OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
- (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
- (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
- (comp3 << BRW_VE1_COMPONENT_3_SHIFT) |
- ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
+
+ if (BRW_IS_IGDNG(brw))
+ OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
+ (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
+ (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
+ (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
+ else
+ OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
+ (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
+ (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
+ (comp3 << BRW_VE1_COMPONENT_3_SHIFT) |
+ ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
}
ADVANCE_BATCH();
}
@@ -635,7 +654,7 @@ static void brw_emit_indices(struct brw_context *brw)
if (index_buffer == NULL)
return;
- ib_size = get_size(index_buffer->type) * index_buffer->count;
+ ib_size = get_size(index_buffer->type) * index_buffer->count - 1;
/* Emit the indexbuffer packet:
*/
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 003332f78c..30603bdd0e 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -97,7 +97,7 @@ struct brw_glsl_call;
#define BRW_EU_MAX_INSN_STACK 5
-#define BRW_EU_MAX_INSN 4000
+#define BRW_EU_MAX_INSN 10000
struct brw_compile {
struct brw_instruction store[BRW_EU_MAX_INSN];
@@ -171,9 +171,9 @@ static INLINE struct brw_reg brw_reg( GLuint file,
{
struct brw_reg reg;
if (type == BRW_GENERAL_REGISTER_FILE)
- assert(nr < 128);
+ assert(nr < BRW_MAX_GRF);
else if (type == BRW_MESSAGE_REGISTER_FILE)
- assert(nr < 9);
+ assert(nr < BRW_MAX_MRF);
else if (type == BRW_ARCHITECTURE_REGISTER_FILE)
assert(nr <= BRW_ARF_IP);
@@ -538,6 +538,7 @@ static INLINE struct brw_reg brw_mask_reg( GLuint subnr )
static INLINE struct brw_reg brw_message_reg( GLuint nr )
{
+ assert(nr < BRW_MAX_MRF);
return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE,
nr,
0);
@@ -815,6 +816,19 @@ void brw_urb_WRITE(struct brw_compile *p,
GLuint offset,
GLuint swizzle);
+void brw_ff_sync(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLboolean allocate,
+ GLboolean used,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean eot,
+ GLboolean writes_complete,
+ GLuint offset,
+ GLuint swizzle);
+
void brw_fb_WRITE(struct brw_compile *p,
struct brw_reg dest,
GLuint msg_reg_nr,
@@ -834,7 +848,9 @@ void brw_SAMPLE(struct brw_compile *p,
GLuint msg_type,
GLuint response_length,
GLuint msg_length,
- GLboolean eot);
+ GLboolean eot,
+ GLuint header_present,
+ GLuint simd_mode);
void brw_math_16( struct brw_compile *p,
struct brw_reg dest,
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 2a147fb8c3..2412014248 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -241,7 +241,8 @@ void brw_set_src1( struct brw_instruction *insn,
-static void brw_set_math_message( struct brw_instruction *insn,
+static void brw_set_math_message( struct brw_context *brw,
+ struct brw_instruction *insn,
GLuint msg_length,
GLuint response_length,
GLuint function,
@@ -252,18 +253,35 @@ static void brw_set_math_message( struct brw_instruction *insn,
{
brw_set_src1(insn, brw_imm_d(0));
- insn->bits3.math.function = function;
- insn->bits3.math.int_type = integer_type;
- insn->bits3.math.precision = low_precision;
- insn->bits3.math.saturate = saturate;
- insn->bits3.math.data_type = dataType;
- insn->bits3.math.response_length = response_length;
- insn->bits3.math.msg_length = msg_length;
- insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
- insn->bits3.math.end_of_thread = 0;
+ if (BRW_IS_IGDNG(brw)) {
+ insn->bits3.math_igdng.function = function;
+ insn->bits3.math_igdng.int_type = integer_type;
+ insn->bits3.math_igdng.precision = low_precision;
+ insn->bits3.math_igdng.saturate = saturate;
+ insn->bits3.math_igdng.data_type = dataType;
+ insn->bits3.math_igdng.snapshot = 0;
+ insn->bits3.math_igdng.header_present = 0;
+ insn->bits3.math_igdng.response_length = response_length;
+ insn->bits3.math_igdng.msg_length = msg_length;
+ insn->bits3.math_igdng.end_of_thread = 0;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_MATH;
+ insn->bits2.send_igdng.end_of_thread = 0;
+ } else {
+ insn->bits3.math.function = function;
+ insn->bits3.math.int_type = integer_type;
+ insn->bits3.math.precision = low_precision;
+ insn->bits3.math.saturate = saturate;
+ insn->bits3.math.data_type = dataType;
+ insn->bits3.math.response_length = response_length;
+ insn->bits3.math.msg_length = msg_length;
+ insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
+ insn->bits3.math.end_of_thread = 0;
+ }
}
-static void brw_set_urb_message( struct brw_instruction *insn,
+
+static void brw_set_ff_sync_message( struct brw_context *brw,
+ struct brw_instruction *insn,
GLboolean allocate,
GLboolean used,
GLuint msg_length,
@@ -273,21 +291,64 @@ static void brw_set_urb_message( struct brw_instruction *insn,
GLuint offset,
GLuint swizzle_control )
{
- brw_set_src1(insn, brw_imm_d(0));
+ brw_set_src1(insn, brw_imm_d(0));
+
+ insn->bits3.urb_igdng.opcode = 1;
+ insn->bits3.urb_igdng.offset = offset;
+ insn->bits3.urb_igdng.swizzle_control = swizzle_control;
+ insn->bits3.urb_igdng.allocate = allocate;
+ insn->bits3.urb_igdng.used = used;
+ insn->bits3.urb_igdng.complete = complete;
+ insn->bits3.urb_igdng.header_present = 1;
+ insn->bits3.urb_igdng.response_length = response_length;
+ insn->bits3.urb_igdng.msg_length = msg_length;
+ insn->bits3.urb_igdng.end_of_thread = end_of_thread;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
+ insn->bits2.send_igdng.end_of_thread = end_of_thread;
+}
- insn->bits3.urb.opcode = 0; /* ? */
- insn->bits3.urb.offset = offset;
- insn->bits3.urb.swizzle_control = swizzle_control;
- insn->bits3.urb.allocate = allocate;
- insn->bits3.urb.used = used; /* ? */
- insn->bits3.urb.complete = complete;
- insn->bits3.urb.response_length = response_length;
- insn->bits3.urb.msg_length = msg_length;
- insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
- insn->bits3.urb.end_of_thread = end_of_thread;
+static void brw_set_urb_message( struct brw_context *brw,
+ struct brw_instruction *insn,
+ GLboolean allocate,
+ GLboolean used,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean end_of_thread,
+ GLboolean complete,
+ GLuint offset,
+ GLuint swizzle_control )
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ if (BRW_IS_IGDNG(brw)) {
+ insn->bits3.urb_igdng.opcode = 0; /* ? */
+ insn->bits3.urb_igdng.offset = offset;
+ insn->bits3.urb_igdng.swizzle_control = swizzle_control;
+ insn->bits3.urb_igdng.allocate = allocate;
+ insn->bits3.urb_igdng.used = used; /* ? */
+ insn->bits3.urb_igdng.complete = complete;
+ insn->bits3.urb_igdng.header_present = 1;
+ insn->bits3.urb_igdng.response_length = response_length;
+ insn->bits3.urb_igdng.msg_length = msg_length;
+ insn->bits3.urb_igdng.end_of_thread = end_of_thread;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
+ insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ } else {
+ insn->bits3.urb.opcode = 0; /* ? */
+ insn->bits3.urb.offset = offset;
+ insn->bits3.urb.swizzle_control = swizzle_control;
+ insn->bits3.urb.allocate = allocate;
+ insn->bits3.urb.used = used; /* ? */
+ insn->bits3.urb.complete = complete;
+ insn->bits3.urb.response_length = response_length;
+ insn->bits3.urb.msg_length = msg_length;
+ insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
+ insn->bits3.urb.end_of_thread = end_of_thread;
+ }
}
-static void brw_set_dp_write_message( struct brw_instruction *insn,
+static void brw_set_dp_write_message( struct brw_context *brw,
+ struct brw_instruction *insn,
GLuint binding_table_index,
GLuint msg_control,
GLuint msg_type,
@@ -298,18 +359,33 @@ static void brw_set_dp_write_message( struct brw_instruction *insn,
{
brw_set_src1(insn, brw_imm_d(0));
- insn->bits3.dp_write.binding_table_index = binding_table_index;
- insn->bits3.dp_write.msg_control = msg_control;
- insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
- insn->bits3.dp_write.msg_type = msg_type;
- insn->bits3.dp_write.send_commit_msg = 0;
- insn->bits3.dp_write.response_length = response_length;
- insn->bits3.dp_write.msg_length = msg_length;
- insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
- insn->bits3.urb.end_of_thread = end_of_thread;
+ if (BRW_IS_IGDNG(brw)) {
+ insn->bits3.dp_write_igdng.binding_table_index = binding_table_index;
+ insn->bits3.dp_write_igdng.msg_control = msg_control;
+ insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear;
+ insn->bits3.dp_write_igdng.msg_type = msg_type;
+ insn->bits3.dp_write_igdng.send_commit_msg = 0;
+ insn->bits3.dp_write_igdng.header_present = 1;
+ insn->bits3.dp_write_igdng.response_length = response_length;
+ insn->bits3.dp_write_igdng.msg_length = msg_length;
+ insn->bits3.dp_write_igdng.end_of_thread = end_of_thread;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
+ insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ } else {
+ insn->bits3.dp_write.binding_table_index = binding_table_index;
+ insn->bits3.dp_write.msg_control = msg_control;
+ insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
+ insn->bits3.dp_write.msg_type = msg_type;
+ insn->bits3.dp_write.send_commit_msg = 0;
+ insn->bits3.dp_write.response_length = response_length;
+ insn->bits3.dp_write.msg_length = msg_length;
+ insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
+ insn->bits3.dp_write.end_of_thread = end_of_thread;
+ }
}
-static void brw_set_dp_read_message( struct brw_instruction *insn,
+static void brw_set_dp_read_message( struct brw_context *brw,
+ struct brw_instruction *insn,
GLuint binding_table_index,
GLuint msg_control,
GLuint msg_type,
@@ -320,15 +396,29 @@ static void brw_set_dp_read_message( struct brw_instruction *insn,
{
brw_set_src1(insn, brw_imm_d(0));
- insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
- insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
- insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
- insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
- insn->bits3.dp_read.response_length = response_length; /*16:19*/
- insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
- insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
- insn->bits3.dp_read.pad1 = 0; /*28:30*/
- insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/
+ if (BRW_IS_IGDNG(brw)) {
+ insn->bits3.dp_read_igdng.binding_table_index = binding_table_index;
+ insn->bits3.dp_read_igdng.msg_control = msg_control;
+ insn->bits3.dp_read_igdng.msg_type = msg_type;
+ insn->bits3.dp_read_igdng.target_cache = target_cache;
+ insn->bits3.dp_read_igdng.header_present = 1;
+ insn->bits3.dp_read_igdng.response_length = response_length;
+ insn->bits3.dp_read_igdng.msg_length = msg_length;
+ insn->bits3.dp_read_igdng.pad1 = 0;
+ insn->bits3.dp_read_igdng.end_of_thread = end_of_thread;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
+ insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ } else {
+ insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
+ insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
+ insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
+ insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
+ insn->bits3.dp_read.response_length = response_length; /*16:19*/
+ insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
+ insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
+ insn->bits3.dp_read.pad1 = 0; /*28:30*/
+ insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/
+ }
}
static void brw_set_sampler_message(struct brw_context *brw,
@@ -338,11 +428,25 @@ static void brw_set_sampler_message(struct brw_context *brw,
GLuint msg_type,
GLuint response_length,
GLuint msg_length,
- GLboolean eot)
+ GLboolean eot,
+ GLuint header_present,
+ GLuint simd_mode)
{
+ assert(eot == 0);
brw_set_src1(insn, brw_imm_d(0));
- if (BRW_IS_G4X(brw)) {
+ if (BRW_IS_IGDNG(brw)) {
+ insn->bits3.sampler_igdng.binding_table_index = binding_table_index;
+ insn->bits3.sampler_igdng.sampler = sampler;
+ insn->bits3.sampler_igdng.msg_type = msg_type;
+ insn->bits3.sampler_igdng.simd_mode = simd_mode;
+ insn->bits3.sampler_igdng.header_present = header_present;
+ insn->bits3.sampler_igdng.response_length = response_length;
+ insn->bits3.sampler_igdng.msg_length = msg_length;
+ insn->bits3.sampler_igdng.end_of_thread = eot;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER;
+ insn->bits2.send_igdng.end_of_thread = eot;
+ } else if (BRW_IS_G4X(brw)) {
insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
insn->bits3.sampler_g4x.sampler = sampler;
insn->bits3.sampler_g4x.msg_type = msg_type;
@@ -484,6 +588,10 @@ struct brw_instruction *brw_JMPI(struct brw_compile *p,
{
struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
+ insn->header.execution_size = 1;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.mask_control = BRW_MASK_DISABLE;
+
p->current->header.predicate_control = BRW_PREDICATE_NONE;
return insn;
@@ -540,6 +648,10 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p,
struct brw_instruction *if_insn)
{
struct brw_instruction *insn;
+ GLuint br = 1;
+
+ if (BRW_IS_IGDNG(p->brw))
+ br = 2;
if (p->single_program_flow) {
insn = next_insn(p, BRW_OPCODE_ADD);
@@ -566,7 +678,7 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p,
} else {
assert(if_insn->header.opcode == BRW_OPCODE_IF);
- if_insn->bits3.if_else.jump_count = insn - if_insn;
+ if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
if_insn->bits3.if_else.pop_count = 1;
if_insn->bits3.if_else.pad0 = 0;
}
@@ -577,6 +689,11 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p,
void brw_ENDIF(struct brw_compile *p,
struct brw_instruction *patch_insn)
{
+ GLuint br = 1;
+
+ if (BRW_IS_IGDNG(p->brw))
+ br = 2;
+
if (p->single_program_flow) {
/* In single program flow mode, there's no need to execute an ENDIF,
* since we don't need to do any stack operations, and if we're executing
@@ -608,11 +725,11 @@ void brw_ENDIF(struct brw_compile *p,
/* Automagically turn it into an IFF:
*/
patch_insn->header.opcode = BRW_OPCODE_IFF;
- patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
+ patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
patch_insn->bits3.if_else.pop_count = 0;
patch_insn->bits3.if_else.pad0 = 0;
} else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
- patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
+ patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
patch_insn->bits3.if_else.pop_count = 1;
patch_insn->bits3.if_else.pad0 = 0;
} else {
@@ -686,6 +803,10 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p,
struct brw_instruction *do_insn)
{
struct brw_instruction *insn;
+ GLuint br = 1;
+
+ if (BRW_IS_IGDNG(p->brw))
+ br = 2;
if (p->single_program_flow)
insn = next_insn(p, BRW_OPCODE_ADD);
@@ -706,7 +827,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p,
insn->header.execution_size = do_insn->header.execution_size;
assert(do_insn->header.opcode == BRW_OPCODE_DO);
- insn->bits3.if_else.jump_count = do_insn - insn + 1;
+ insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
insn->bits3.if_else.pop_count = 0;
insn->bits3.if_else.pad0 = 0;
}
@@ -725,11 +846,15 @@ void brw_land_fwd_jump(struct brw_compile *p,
struct brw_instruction *jmp_insn)
{
struct brw_instruction *landing = &p->store[p->nr_insn];
+ GLuint jmpi = 1;
+
+ if (BRW_IS_IGDNG(p->brw))
+ jmpi = 2;
assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
- jmp_insn->bits3.ud = (landing - jmp_insn) - 1;
+ jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
}
@@ -794,7 +919,8 @@ void brw_math( struct brw_compile *p,
brw_set_dest(insn, dest);
brw_set_src0(insn, src);
- brw_set_math_message(insn,
+ brw_set_math_message(p->brw,
+ insn,
msg_length, response_length,
function,
BRW_MATH_INTEGER_UNSIGNED,
@@ -830,7 +956,8 @@ void brw_math_16( struct brw_compile *p,
brw_set_dest(insn, dest);
brw_set_src0(insn, src);
- brw_set_math_message(insn,
+ brw_set_math_message(p->brw,
+ insn,
msg_length, response_length,
function,
BRW_MATH_INTEGER_UNSIGNED,
@@ -846,7 +973,8 @@ void brw_math_16( struct brw_compile *p,
brw_set_dest(insn, offset(dest,1));
brw_set_src0(insn, src);
- brw_set_math_message(insn,
+ brw_set_math_message(p->brw,
+ insn,
msg_length, response_length,
function,
BRW_MATH_INTEGER_UNSIGNED,
@@ -893,7 +1021,8 @@ void brw_dp_WRITE_16( struct brw_compile *p,
brw_set_dest(insn, dest);
brw_set_src0(insn, src);
- brw_set_dp_write_message(insn,
+ brw_set_dp_write_message(p->brw,
+ insn,
255, /* binding table index (255=stateless) */
BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
@@ -938,7 +1067,8 @@ void brw_dp_READ_16( struct brw_compile *p,
brw_set_dest(insn, dest); /* UW? */
brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
- brw_set_dp_read_message(insn,
+ brw_set_dp_read_message(p->brw,
+ insn,
255, /* binding table index (255=stateless) */
3, /* msg_control (3 means 4 Owords) */
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
@@ -995,7 +1125,8 @@ void brw_dp_READ_4( struct brw_compile *p,
brw_set_dest(insn, dest);
brw_set_src0(insn, brw_null_reg());
- brw_set_dp_read_message(insn,
+ brw_set_dp_read_message(p->brw,
+ insn,
bind_table_index,
0, /* msg_control (0 means 1 Oword) */
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
@@ -1066,7 +1197,8 @@ void brw_dp_READ_4_vs(struct brw_compile *p,
brw_set_dest(insn, dest);
brw_set_src0(insn, brw_null_reg());
- brw_set_dp_read_message(insn,
+ brw_set_dp_read_message(p->brw,
+ insn,
bind_table_index,
oword, /* 0 = lower Oword, 1 = upper Oword */
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
@@ -1096,7 +1228,8 @@ void brw_fb_WRITE(struct brw_compile *p,
brw_set_dest(insn, dest);
brw_set_src0(insn, src0);
- brw_set_dp_write_message(insn,
+ brw_set_dp_write_message(p->brw,
+ insn,
binding_table_index,
BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
@@ -1122,7 +1255,9 @@ void brw_SAMPLE(struct brw_compile *p,
GLuint msg_type,
GLuint response_length,
GLuint msg_length,
- GLboolean eot)
+ GLboolean eot,
+ GLuint header_present,
+ GLuint simd_mode)
{
GLboolean need_stall = 0;
@@ -1197,7 +1332,9 @@ void brw_SAMPLE(struct brw_compile *p,
msg_type,
response_length,
msg_length,
- eot);
+ eot,
+ header_present,
+ simd_mode);
}
if (need_stall) {
@@ -1232,7 +1369,7 @@ void brw_urb_WRITE(struct brw_compile *p,
{
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
- assert(msg_length < 16);
+ assert(msg_length < BRW_MAX_MRF);
brw_set_dest(insn, dest);
brw_set_src0(insn, src0);
@@ -1240,7 +1377,8 @@ void brw_urb_WRITE(struct brw_compile *p,
insn->header.destreg__conditonalmod = msg_reg_nr;
- brw_set_urb_message(insn,
+ brw_set_urb_message(p->brw,
+ insn,
allocate,
used,
msg_length,
@@ -1251,3 +1389,37 @@ void brw_urb_WRITE(struct brw_compile *p,
swizzle);
}
+void brw_ff_sync(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLboolean allocate,
+ GLboolean used,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean eot,
+ GLboolean writes_complete,
+ GLuint offset,
+ GLuint swizzle)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ assert(msg_length < 16);
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_src1(insn, brw_imm_d(0));
+
+ insn->header.destreg__conditonalmod = msg_reg_nr;
+
+ brw_set_ff_sync_message(p->brw,
+ insn,
+ allocate,
+ used,
+ msg_length,
+ response_length,
+ eot,
+ writes_complete,
+ offset,
+ swizzle);
+}
diff --git a/src/mesa/drivers/dri/i965/brw_fallback.c b/src/mesa/drivers/dri/i965/brw_fallback.c
index 299357409c..d27c6c24ca 100644
--- a/src/mesa/drivers/dri/i965/brw_fallback.c
+++ b/src/mesa/drivers/dri/i965/brw_fallback.c
@@ -37,6 +37,9 @@
#include "tnl/tnl.h"
#include "brw_context.h"
#include "brw_fallback.h"
+#include "intel_chipset.h"
+#include "intel_fbo.h"
+#include "intel_regions.h"
#include "glapi/glapi.h"
@@ -44,6 +47,7 @@
static GLboolean do_check_fallback(struct brw_context *brw)
{
+ struct intel_context *intel = &brw->intel;
GLcontext *ctx = &brw->intel.ctx;
GLuint i;
@@ -81,6 +85,33 @@ static GLboolean do_check_fallback(struct brw_context *brw)
return GL_TRUE;
}
+ /* _NEW_BUFFERS */
+ if (IS_965(intel->intelScreen->deviceID) &&
+ !IS_G4X(intel->intelScreen->deviceID)) {
+ for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
+ struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
+ struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+
+ /* The original gen4 hardware couldn't set up WM surfaces pointing
+ * at an offset within a tile, which can happen when rendering to
+ * anything but the base level of a texture or the +X face/0 depth.
+ * This was fixed with the 4 Series hardware.
+ *
+ * For these original chips, you would have to make the depth and
+ * color destination surfaces include information on the texture
+ * type, LOD, face, and various limits to use them as a destination.
+ * I would have done this, but there's also a nasty requirement that
+ * the depth and the color surfaces all be of the same LOD, which
+ * may be a worse requirement than this alignment. (Also, we may
+ * want to just demote the texture to untiled, instead).
+ */
+ if (irb->region && irb->region->tiling != I915_TILING_NONE &&
+ (irb->region->draw_offset & 4095)) {
+ DBG("FALLBACK: non-tile-aligned destination for tiled FBO\n");
+ return GL_TRUE;
+ }
+ }
+ }
return GL_FALSE;
}
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index a8b74a0afe..48c2b9a41c 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -54,12 +54,17 @@ static void compile_gs_prog( struct brw_context *brw,
memset(&c, 0, sizeof(c));
c.key = *key;
-
+ c.need_ff_sync = BRW_IS_IGDNG(brw);
/* Need to locate the two positions present in vertex + header.
* These are currently hardcoded:
*/
c.nr_attrs = brw_count_bits(c.key.attrs);
- c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
+
+ if (BRW_IS_IGDNG(brw))
+ c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */
+ else
+ c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
+
c.nr_bytes = c.nr_regs * REG_SIZE;
diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h
index 18a4537c32..bbb991ea2e 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.h
+++ b/src/mesa/drivers/dri/i965/brw_gs.h
@@ -62,6 +62,7 @@ struct brw_gs_compile {
GLuint nr_attrs;
GLuint nr_regs;
GLuint nr_bytes;
+ GLboolean need_ff_sync;
};
#define ATTR_SIZE (4*4)
diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c
index 22e0d25c2e..980eac7646 100644
--- a/src/mesa/drivers/dri/i965/brw_gs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c
@@ -101,6 +101,23 @@ static void brw_gs_emit_vue(struct brw_gs_compile *c,
BRW_URB_SWIZZLE_NONE);
}
+void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim)
+{
+ struct brw_compile *p = &c->func;
+ brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim));
+ brw_ff_sync(p,
+ c->reg.R0,
+ 0,
+ c->reg.R0,
+ 1,
+ 1, /* used */
+ 1, /* msg length */
+ 1, /* response length */
+ 0, /* eot */
+ 1, /* write compelete */
+ 0, /* urb offset */
+ BRW_URB_SWIZZLE_NONE);
+}
void brw_gs_quads( struct brw_gs_compile *c )
@@ -110,6 +127,8 @@ void brw_gs_quads( struct brw_gs_compile *c )
/* Use polygons for correct edgeflag behaviour. Note that vertex 3
* is the PV for quads, but vertex 0 for polygons:
*/
+ if (c->need_ff_sync)
+ brw_gs_ff_sync(c, 1);
brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2));
@@ -120,6 +139,8 @@ void brw_gs_quad_strip( struct brw_gs_compile *c )
{
brw_gs_alloc_regs(c, 4);
+ if (c->need_ff_sync)
+ brw_gs_ff_sync(c, 1);
brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2));
brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
@@ -129,6 +150,9 @@ void brw_gs_quad_strip( struct brw_gs_compile *c )
void brw_gs_tris( struct brw_gs_compile *c )
{
brw_gs_alloc_regs(c, 3);
+
+ if (c->need_ff_sync)
+ brw_gs_ff_sync(c, 1);
brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START));
brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2));
brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_TRILIST << 2) | R02_PRIM_END));
@@ -137,6 +161,9 @@ void brw_gs_tris( struct brw_gs_compile *c )
void brw_gs_lines( struct brw_gs_compile *c )
{
brw_gs_alloc_regs(c, 2);
+
+ if (c->need_ff_sync)
+ brw_gs_ff_sync(c, 1);
brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START));
brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END));
}
@@ -144,6 +171,9 @@ void brw_gs_lines( struct brw_gs_compile *c )
void brw_gs_points( struct brw_gs_compile *c )
{
brw_gs_alloc_regs(c, 1);
+
+ if (c->need_ff_sync)
+ brw_gs_ff_sync(c, 1);
brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END));
}
diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c
index 27023cf034..a761c03153 100644
--- a/src/mesa/drivers/dri/i965/brw_gs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_state.c
@@ -95,6 +95,9 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key)
gs.thread4.max_threads = 0; /* Hardware requirement */
+ if (BRW_IS_IGDNG(brw))
+ gs.thread4.rendering_enable = 1;
+
if (INTEL_DEBUG & DEBUG_STATS)
gs.thread4.stats_enable = 1;
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 9bc5c35139..85a7706404 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -118,7 +118,10 @@ static void upload_binding_table_pointers(struct brw_context *brw)
BEGIN_BATCH(6, IGNORE_CLIPRECTS);
OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
- OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */
+ if (brw->vs.bind_bo != NULL)
+ OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */
+ else
+ OUT_BATCH(0);
OUT_BATCH(0); /* gs */
OUT_BATCH(0); /* clip */
OUT_BATCH(0); /* sf */
@@ -208,7 +211,7 @@ static void emit_depthbuffer(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
struct intel_region *region = brw->state.depth_region;
- unsigned int len = BRW_IS_G4X(brw) ? 6 : 5;
+ unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5;
if (region == NULL) {
BEGIN_BATCH(len, IGNORE_CLIPRECTS);
@@ -219,7 +222,7 @@ static void emit_depthbuffer(struct brw_context *brw)
OUT_BATCH(0);
OUT_BATCH(0);
- if (BRW_IS_G4X(brw))
+ if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
OUT_BATCH(0);
ADVANCE_BATCH();
@@ -241,6 +244,8 @@ static void emit_depthbuffer(struct brw_context *brw)
return;
}
+ assert(region->tiling != I915_TILING_X);
+
BEGIN_BATCH(len, IGNORE_CLIPRECTS);
OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
OUT_BATCH(((region->pitch * region->cpp) - 1) |
@@ -256,7 +261,7 @@ static void emit_depthbuffer(struct brw_context *brw)
((region->height - 1) << 19));
OUT_BATCH(0);
- if (BRW_IS_G4X(brw))
+ if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
OUT_BATCH(0);
ADVANCE_BATCH();
@@ -369,7 +374,7 @@ static void upload_aa_line_parameters(struct brw_context *brw)
{
struct brw_aa_line_parameters balp;
- if (!BRW_IS_G4X(brw))
+ if (BRW_IS_965(brw))
return;
/* use legacy aa line coverage computation */
@@ -506,14 +511,27 @@ static void upload_state_base_address( struct brw_context *brw )
/* Output the structure (brw_state_base_address) directly to the
* batchbuffer, so we can emit relocations inline.
*/
- BEGIN_BATCH(6, IGNORE_CLIPRECTS);
- OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
- OUT_BATCH(1); /* General state base address */
- OUT_BATCH(1); /* Surface state base address */
- OUT_BATCH(1); /* Indirect object base address */
- OUT_BATCH(1); /* General state upper bound */
- OUT_BATCH(1); /* Indirect object upper bound */
- ADVANCE_BATCH();
+ if (BRW_IS_IGDNG(brw)) {
+ BEGIN_BATCH(8, IGNORE_CLIPRECTS);
+ OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
+ OUT_BATCH(1); /* General state base address */
+ OUT_BATCH(1); /* Surface state base address */
+ OUT_BATCH(1); /* Indirect object base address */
+ OUT_BATCH(1); /* Instruction base address */
+ OUT_BATCH(1); /* General state upper bound */
+ OUT_BATCH(1); /* Indirect object upper bound */
+ OUT_BATCH(1); /* Instruction access upper bound */
+ ADVANCE_BATCH();
+ } else {
+ BEGIN_BATCH(6, IGNORE_CLIPRECTS);
+ OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
+ OUT_BATCH(1); /* General state base address */
+ OUT_BATCH(1); /* Surface state base address */
+ OUT_BATCH(1); /* Indirect object base address */
+ OUT_BATCH(1); /* General state upper bound */
+ OUT_BATCH(1); /* Indirect object upper bound */
+ ADVANCE_BATCH();
+ }
}
const struct brw_tracked_state brw_state_base_address = {
diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c
index c3c85978f4..e1c2c7777b 100644
--- a/src/mesa/drivers/dri/i965/brw_sf.c
+++ b/src/mesa/drivers/dri/i965/brw_sf.c
@@ -166,6 +166,9 @@ static void upload_sf_prog(struct brw_context *brw)
key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
key.do_twoside_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide);
+ /* _NEW_HINT */
+ key.linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST);
+
/* _NEW_POLYGON */
if (key.do_twoside_color) {
/* If we're rendering to a FBO, we have to invert the polygon
@@ -188,7 +191,7 @@ static void upload_sf_prog(struct brw_context *brw)
const struct brw_tracked_state brw_sf_prog = {
.dirty = {
- .mesa = (_NEW_LIGHT|_NEW_POLYGON|_NEW_POINT),
+ .mesa = (_NEW_HINT | _NEW_LIGHT | _NEW_POLYGON | _NEW_POINT),
.brw = (BRW_NEW_REDUCED_PRIMITIVE),
.cache = CACHE_NEW_VS_PROG
},
diff --git a/src/mesa/drivers/dri/i965/brw_sf.h b/src/mesa/drivers/dri/i965/brw_sf.h
index 1c0fb70fe0..6426b6df9f 100644
--- a/src/mesa/drivers/dri/i965/brw_sf.h
+++ b/src/mesa/drivers/dri/i965/brw_sf.h
@@ -51,7 +51,8 @@ struct brw_sf_prog_key {
GLuint do_flat_shading:1;
GLuint frontface_ccw:1;
GLuint do_point_sprite:1;
- GLuint pad:10;
+ GLuint linear_color:1; /**< linear interp vs. perspective interp */
+ GLuint pad:25;
GLenum SpriteOrigin;
};
diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c
index 862835f157..ca8f97f9f9 100644
--- a/src/mesa/drivers/dri/i965/brw_sf_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c
@@ -151,6 +151,8 @@ static void do_flatshade_triangle( struct brw_sf_compile *c )
struct brw_compile *p = &c->func;
struct brw_reg ip = brw_ip_reg();
GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
+ GLuint jmpi = 1;
+
if (!nr)
return;
@@ -159,18 +161,21 @@ static void do_flatshade_triangle( struct brw_sf_compile *c )
if (c->key.primitive == SF_UNFILLED_TRIS)
return;
+ if (BRW_IS_IGDNG(p->brw))
+ jmpi = 2;
+
brw_push_insn_state(p);
- brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr*2+1));
+ brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1)));
brw_JMPI(p, ip, ip, c->pv);
copy_colors(c, c->vert[1], c->vert[0]);
copy_colors(c, c->vert[2], c->vert[0]);
- brw_JMPI(p, ip, ip, brw_imm_ud(nr*4+1));
+ brw_JMPI(p, ip, ip, brw_imm_d(jmpi*(nr*4+1)));
copy_colors(c, c->vert[0], c->vert[1]);
copy_colors(c, c->vert[2], c->vert[1]);
- brw_JMPI(p, ip, ip, brw_imm_ud(nr*2));
+ brw_JMPI(p, ip, ip, brw_imm_d(jmpi*nr*2));
copy_colors(c, c->vert[0], c->vert[2]);
copy_colors(c, c->vert[1], c->vert[2]);
@@ -184,7 +189,8 @@ static void do_flatshade_line( struct brw_sf_compile *c )
struct brw_compile *p = &c->func;
struct brw_reg ip = brw_ip_reg();
GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
-
+ GLuint jmpi = 1;
+
if (!nr)
return;
@@ -193,13 +199,16 @@ static void do_flatshade_line( struct brw_sf_compile *c )
if (c->key.primitive == SF_UNFILLED_TRIS)
return;
+ if (BRW_IS_IGDNG(p->brw))
+ jmpi = 2;
+
brw_push_insn_state(p);
- brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr+1));
+ brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
brw_JMPI(p, ip, ip, c->pv);
copy_colors(c, c->vert[1], c->vert[0]);
- brw_JMPI(p, ip, ip, brw_imm_ud(nr));
+ brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr));
copy_colors(c, c->vert[0], c->vert[1]);
brw_pop_insn_state(p);
@@ -218,7 +227,7 @@ static void alloc_regs( struct brw_sf_compile *c )
/* Values computed by fixed function unit:
*/
- c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_UD);
+ c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D);
c->det = brw_vec1_grf(1, 2);
c->dx0 = brw_vec1_grf(1, 3);
c->dx2 = brw_vec1_grf(1, 4);
@@ -295,9 +304,6 @@ static void invert_det( struct brw_sf_compile *c)
}
-#define NON_PERPECTIVE_ATTRS (FRAG_BIT_WPOS | \
- FRAG_BIT_COL0 | \
- FRAG_BIT_COL1)
static GLboolean calculate_masks( struct brw_sf_compile *c,
GLuint reg,
@@ -306,9 +312,16 @@ static GLboolean calculate_masks( struct brw_sf_compile *c,
GLushort *pc_linear)
{
GLboolean is_last_attr = (reg == c->nr_setup_regs - 1);
- GLuint persp_mask = c->key.attrs & ~NON_PERPECTIVE_ATTRS;
+ GLuint persp_mask;
GLuint linear_mask;
+ if (c->key.do_flat_shading || c->key.linear_color)
+ persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS |
+ FRAG_BIT_COL0 |
+ FRAG_BIT_COL1);
+ else
+ persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS);
+
if (c->key.do_flat_shading)
linear_mask = c->key.attrs & ~(FRAG_BIT_COL0|FRAG_BIT_COL1);
else
@@ -674,7 +687,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c )
(1<<_3DPRIM_POLYGON) |
(1<<_3DPRIM_RECTLIST) |
(1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
- jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
+ jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
{
saveflag = p->flag_value;
brw_push_insn_state(p);
@@ -695,7 +708,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c )
(1<<_3DPRIM_LINESTRIP_CONT) |
(1<<_3DPRIM_LINESTRIP_BF) |
(1<<_3DPRIM_LINESTRIP_CONT_BF)));
- jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
+ jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
{
saveflag = p->flag_value;
brw_push_insn_state(p);
@@ -708,7 +721,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c )
brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
- jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
+ jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
{
saveflag = p->flag_value;
brw_push_insn_state(p);
diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c
index c99918724b..b5f6371c82 100644
--- a/src/mesa/drivers/dri/i965/brw_sf_state.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_state.c
@@ -162,7 +162,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
{
struct brw_sf_unit_state sf;
dri_bo *bo;
-
+ int chipset_max_threads;
memset(&sf, 0, sizeof(sf));
sf.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
@@ -171,13 +171,26 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
sf.thread3.dispatch_grf_start_reg = 3;
- sf.thread3.urb_entry_read_offset = 1;
+
+ if (BRW_IS_IGDNG(brw))
+ sf.thread3.urb_entry_read_offset = 3;
+ else
+ sf.thread3.urb_entry_read_offset = 1;
+
sf.thread3.urb_entry_read_length = key->urb_entry_read_length;
sf.thread4.nr_urb_entries = key->nr_urb_entries;
sf.thread4.urb_entry_allocation_size = key->sfsize - 1;
- /* Each SF thread produces 1 PUE, and there can be up to 24 threads */
- sf.thread4.max_threads = MIN2(24, key->nr_urb_entries) - 1;
+
+ /* Each SF thread produces 1 PUE, and there can be up to 24(Pre-IGDNG) or
+ * 48(IGDNG) threads
+ */
+ if (BRW_IS_IGDNG(brw))
+ chipset_max_threads = 48;
+ else
+ chipset_max_threads = 24;
+
+ sf.thread4.max_threads = MIN2(chipset_max_threads, key->nr_urb_entries) - 1;
if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
sf.thread4.max_threads = 0;
@@ -233,7 +246,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
else if (sf.sf6.line_width <= 0x2)
sf.sf6.line_width = 0;
- /* _NEW_POINT */
+ /* _NEW_BUFFERS */
key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
if (!key->render_to_fbo) {
/* Rendering to an OpenGL window */
@@ -263,6 +276,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
}
/* XXX clamp max depends on AA vs. non-AA */
+ /* _NEW_POINT */
sf.sf7.sprite_point = key->point_sprite;
sf.sf7.point_size = CLAMP(rint(key->point_size), 1, 255) * (1<<3);
sf.sf7.use_point_size_state = !key->point_attenuated;
@@ -328,7 +342,8 @@ const struct brw_tracked_state brw_sf_unit = {
.mesa = (_NEW_POLYGON |
_NEW_LINE |
_NEW_POINT |
- _NEW_SCISSOR),
+ _NEW_SCISSOR |
+ _NEW_BUFFERS),
.brw = BRW_NEW_URB_FENCE,
.cache = (CACHE_NEW_SF_VP |
CACHE_NEW_SF_PROG)
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 81b0a45998..bf9f6cae55 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -72,11 +72,13 @@ const struct brw_tracked_state brw_sf_vp;
const struct brw_tracked_state brw_state_base_address;
const struct brw_tracked_state brw_urb_fence;
const struct brw_tracked_state brw_vertex_state;
+const struct brw_tracked_state brw_vs_surfaces;
const struct brw_tracked_state brw_vs_prog;
const struct brw_tracked_state brw_vs_unit;
const struct brw_tracked_state brw_wm_input_sizes;
const struct brw_tracked_state brw_wm_prog;
const struct brw_tracked_state brw_wm_samplers;
+const struct brw_tracked_state brw_wm_constant_surface;
const struct brw_tracked_state brw_wm_surfaces;
const struct brw_tracked_state brw_wm_unit;
@@ -91,6 +93,20 @@ const struct brw_tracked_state brw_drawing_rect;
const struct brw_tracked_state brw_indices;
const struct brw_tracked_state brw_vertices;
+/**
+ * Use same key for WM and VS surfaces.
+ */
+struct brw_surface_key {
+ GLenum target, depthmode;
+ dri_bo *bo;
+ GLint format, internal_format;
+ GLint first_level, last_level;
+ GLint width, height, depth;
+ GLint pitch, cpp;
+ uint32_t tiling;
+ GLuint offset;
+};
+
/***********************************************************************
* brw_state.c
*/
@@ -135,8 +151,8 @@ dri_bo *brw_search_cache( struct brw_cache *cache,
void *aux_return);
void brw_state_cache_check_size( struct brw_context *brw );
-void brw_init_cache( struct brw_context *brw );
-void brw_destroy_cache( struct brw_context *brw );
+void brw_init_caches( struct brw_context *brw );
+void brw_destroy_caches( struct brw_context *brw );
/***********************************************************************
* brw_state_batch.c
@@ -150,4 +166,9 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
void brw_destroy_batch_cache( struct brw_context *brw );
void brw_clear_batch_cache_flush( struct brw_context *brw );
+/* brw_wm_surface_state.c */
+dri_bo *
+brw_create_constant_surface( struct brw_context *brw,
+ struct brw_surface_key *key );
+
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c
index d5b5166406..e40d7a0416 100644
--- a/src/mesa/drivers/dri/i965/brw_state_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
@@ -56,9 +56,9 @@
* incorrect program is run for the other instance.
*/
+#include "main/imports.h"
#include "brw_state.h"
#include "intel_batchbuffer.h"
-#include "main/imports.h"
/* XXX: Fixme - have to include these to get the sizes of the prog_key
* structs:
@@ -69,8 +69,10 @@
#include "brw_sf.h"
#include "brw_gs.h"
-static GLuint hash_key( const void *key, GLuint key_size,
- dri_bo **reloc_bufs, GLuint nr_reloc_bufs)
+
+static GLuint
+hash_key(const void *key, GLuint key_size,
+ dri_bo **reloc_bufs, GLuint nr_reloc_bufs)
{
GLuint *ikey = (GLuint *)key;
GLuint hash = 0, i;
@@ -95,6 +97,7 @@ static GLuint hash_key( const void *key, GLuint key_size,
return hash;
}
+
/**
* Marks a new buffer as being chosen for the given cache id.
*/
@@ -111,6 +114,7 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id,
cache->brw->state.dirty.cache |= 1 << cache_id;
}
+
static struct brw_cache_item *
search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
GLuint hash, const void *key, GLuint key_size,
@@ -143,7 +147,8 @@ search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
}
-static void rehash( struct brw_cache *cache )
+static void
+rehash(struct brw_cache *cache)
{
struct brw_cache_item **items;
struct brw_cache_item *c, *next;
@@ -164,15 +169,17 @@ static void rehash( struct brw_cache *cache )
cache->size = size;
}
+
/**
* Returns the buffer object matching cache_id and key, or NULL.
*/
-dri_bo *brw_search_cache( struct brw_cache *cache,
- enum brw_cache_id cache_id,
- const void *key,
- GLuint key_size,
- dri_bo **reloc_bufs, GLuint nr_reloc_bufs,
- void *aux_return )
+dri_bo *
+brw_search_cache(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *key,
+ GLuint key_size,
+ dri_bo **reloc_bufs, GLuint nr_reloc_bufs,
+ void *aux_return)
{
struct brw_cache_item *item;
GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs);
@@ -192,6 +199,7 @@ dri_bo *brw_search_cache( struct brw_cache *cache,
return item->bo;
}
+
dri_bo *
brw_upload_cache( struct brw_cache *cache,
enum brw_cache_id cache_id,
@@ -265,7 +273,9 @@ brw_upload_cache( struct brw_cache *cache,
return bo;
}
-/* This doesn't really work with aux data. Use search/upload instead
+
+/**
+ * This doesn't really work with aux data. Use search/upload instead
*/
dri_bo *
brw_cache_data_sz(struct brw_cache *cache,
@@ -296,6 +306,7 @@ brw_cache_data_sz(struct brw_cache *cache,
return bo;
}
+
/**
* Wrapper around brw_cache_data_sz using the cache_id's canonical key size.
*
@@ -319,21 +330,22 @@ enum pool_type {
DW_GENERAL_STATE
};
+
static void
-brw_init_cache_id( struct brw_context *brw,
- const char *name,
- enum brw_cache_id id,
- GLuint key_size,
- GLuint aux_size)
+brw_init_cache_id(struct brw_cache *cache,
+ const char *name,
+ enum brw_cache_id id,
+ GLuint key_size,
+ GLuint aux_size)
{
- struct brw_cache *cache = &brw->cache;
-
cache->name[id] = strdup(name);
cache->key_size[id] = key_size;
cache->aux_size[id] = aux_size;
}
-void brw_init_cache( struct brw_context *brw )
+
+static void
+brw_init_non_surface_cache(struct brw_context *brw)
{
struct brw_cache *cache = &brw->cache;
@@ -342,114 +354,136 @@ void brw_init_cache( struct brw_context *brw )
cache->size = 7;
cache->n_items = 0;
cache->items = (struct brw_cache_item **)
- _mesa_calloc(cache->size *
- sizeof(struct brw_cache_item));
+ _mesa_calloc(cache->size * sizeof(struct brw_cache_item));
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"CC_VP",
BRW_CC_VP,
sizeof(struct brw_cc_viewport),
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"CC_UNIT",
BRW_CC_UNIT,
sizeof(struct brw_cc_unit_state),
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"WM_PROG",
BRW_WM_PROG,
sizeof(struct brw_wm_prog_key),
sizeof(struct brw_wm_prog_data));
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"SAMPLER_DEFAULT_COLOR",
BRW_SAMPLER_DEFAULT_COLOR,
sizeof(struct brw_sampler_default_color),
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"SAMPLER",
BRW_SAMPLER,
0, /* variable key/data size */
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"WM_UNIT",
BRW_WM_UNIT,
sizeof(struct brw_wm_unit_state),
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"SF_PROG",
BRW_SF_PROG,
sizeof(struct brw_sf_prog_key),
sizeof(struct brw_sf_prog_data));
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"SF_VP",
BRW_SF_VP,
sizeof(struct brw_sf_viewport),
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"SF_UNIT",
BRW_SF_UNIT,
sizeof(struct brw_sf_unit_state),
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"VS_UNIT",
BRW_VS_UNIT,
sizeof(struct brw_vs_unit_state),
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"VS_PROG",
BRW_VS_PROG,
sizeof(struct brw_vs_prog_key),
sizeof(struct brw_vs_prog_data));
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"CLIP_UNIT",
BRW_CLIP_UNIT,
sizeof(struct brw_clip_unit_state),
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"CLIP_PROG",
BRW_CLIP_PROG,
sizeof(struct brw_clip_prog_key),
sizeof(struct brw_clip_prog_data));
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"GS_UNIT",
BRW_GS_UNIT,
sizeof(struct brw_gs_unit_state),
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"GS_PROG",
BRW_GS_PROG,
sizeof(struct brw_gs_prog_key),
sizeof(struct brw_gs_prog_data));
+}
+
+
+static void
+brw_init_surface_cache(struct brw_context *brw)
+{
+ struct brw_cache *cache = &brw->surface_cache;
+
+ cache->brw = brw;
- brw_init_cache_id(brw,
+ cache->size = 7;
+ cache->n_items = 0;
+ cache->items = (struct brw_cache_item **)
+ _mesa_calloc(cache->size * sizeof(struct brw_cache_item));
+
+ brw_init_cache_id(cache,
"SS_SURFACE",
BRW_SS_SURFACE,
sizeof(struct brw_surface_state),
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"SS_SURF_BIND",
BRW_SS_SURF_BIND,
0,
0);
}
+
+void
+brw_init_caches(struct brw_context *brw)
+{
+ brw_init_non_surface_cache(brw);
+ brw_init_surface_cache(brw);
+}
+
+
static void
-brw_clear_cache( struct brw_context *brw )
+brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
{
struct brw_cache_item *c, *next;
GLuint i;
@@ -457,8 +491,8 @@ brw_clear_cache( struct brw_context *brw )
if (INTEL_DEBUG & DEBUG_STATE)
_mesa_printf("%s\n", __FUNCTION__);
- for (i = 0; i < brw->cache.size; i++) {
- for (c = brw->cache.items[i]; c; c = next) {
+ for (i = 0; i < cache->size; i++) {
+ for (c = cache->items[i]; c; c = next) {
int j;
next = c->next;
@@ -468,10 +502,10 @@ brw_clear_cache( struct brw_context *brw )
free((void *)c->key);
free(c);
}
- brw->cache.items[i] = NULL;
+ cache->items[i] = NULL;
}
- brw->cache.n_items = 0;
+ cache->n_items = 0;
if (brw->curbe.last_buf) {
_mesa_free(brw->curbe.last_buf);
@@ -483,25 +517,46 @@ brw_clear_cache( struct brw_context *brw )
brw->state.dirty.cache |= ~0;
}
-void brw_state_cache_check_size( struct brw_context *brw )
+
+void
+brw_state_cache_check_size(struct brw_context *brw)
{
+ if (INTEL_DEBUG & DEBUG_STATE)
+ _mesa_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items);
+
/* un-tuned guess. We've got around 20 state objects for a total of around
* 32k, so 1000 of them is around 1.5MB.
*/
if (brw->cache.n_items > 1000)
- brw_clear_cache(brw);
+ brw_clear_cache(brw, &brw->cache);
+
+ if (brw->surface_cache.n_items > 1000)
+ brw_clear_cache(brw, &brw->surface_cache);
}
-void brw_destroy_cache( struct brw_context *brw )
+
+static void
+brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
{
GLuint i;
- brw_clear_cache(brw);
+ if (INTEL_DEBUG & DEBUG_STATE)
+ _mesa_printf("%s\n", __FUNCTION__);
+
+ brw_clear_cache(brw, cache);
for (i = 0; i < BRW_MAX_CACHE; i++) {
- dri_bo_unreference(brw->cache.last_bo[i]);
- free(brw->cache.name[i]);
+ dri_bo_unreference(cache->last_bo[i]);
+ free(cache->name[i]);
}
- free(brw->cache.items);
- brw->cache.items = NULL;
- brw->cache.size = 0;
+ free(cache->items);
+ cache->items = NULL;
+ cache->size = 0;
+}
+
+
+void
+brw_destroy_caches(struct brw_context *brw)
+{
+ brw_destroy_cache(brw, &brw->cache);
+ brw_destroy_cache(brw, &brw->surface_cache);
}
diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c
index a713262269..e94fa7d2b4 100644
--- a/src/mesa/drivers/dri/i965/brw_state_dump.c
+++ b/src/mesa/drivers/dri/i965/brw_state_dump.c
@@ -126,6 +126,8 @@ static void dump_wm_surface_state(struct brw_context *brw)
surf->ss3.pitch + 1, surf->ss3.tiled_surface ? "" : "not ");
state_out(name, surf, surfoff, 4, "mip base %d\n",
surf->ss4.min_lod);
+ state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n",
+ surf->ss5.x_offset, surf->ss5.y_offset);
dri_bo_unmap(surf_bo);
}
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 5de1450e61..38d9dd8991 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -59,11 +59,12 @@ const struct brw_tracked_state *atoms[] =
&brw_curbe_offsets,
&brw_recalculate_urb_fence,
-
&brw_cc_vp,
&brw_cc_unit,
- &brw_wm_surfaces, /* must do before samplers */
+ &brw_vs_surfaces, /* must do before unit */
+ &brw_wm_constant_surface, /* must do before wm surfaces/bind bo */
+ &brw_wm_surfaces, /* must do before samplers and unit */
&brw_wm_samplers,
&brw_wm_unit,
@@ -88,54 +89,26 @@ const struct brw_tracked_state *atoms[] =
&brw_line_stipple,
&brw_aa_line_parameters,
- /* Ordering of the commands below is documented as fixed.
- */
-#if 0
- &brw_pipelined_state_pointers,
- &brw_urb_fence,
- &brw_constant_buffer_state,
-#else
+
&brw_psp_urb_cbs,
-#endif
&brw_drawing_rect,
&brw_indices,
&brw_vertices,
- NULL, /* brw_constant_buffer */
+ &brw_constant_buffer
};
void brw_init_state( struct brw_context *brw )
{
- GLuint i;
-
- brw_init_cache(brw);
-
- brw->state.atoms = _mesa_malloc(sizeof(atoms));
- brw->state.nr_atoms = sizeof(atoms)/sizeof(*atoms);
- _mesa_memcpy(brw->state.atoms, atoms, sizeof(atoms));
-
- /* Patch in a pointer to the dynamic state atom:
- */
- for (i = 0; i < brw->state.nr_atoms; i++)
- if (brw->state.atoms[i] == NULL)
- brw->state.atoms[i] = &brw->curbe.tracked_state;
-
- _mesa_memcpy(&brw->curbe.tracked_state,
- &brw_constant_buffer,
- sizeof(brw_constant_buffer));
+ brw_init_caches(brw);
}
void brw_destroy_state( struct brw_context *brw )
{
- if (brw->state.atoms) {
- _mesa_free(brw->state.atoms);
- brw->state.atoms = NULL;
- }
-
- brw_destroy_cache(brw);
+ brw_destroy_caches(brw);
brw_destroy_batch_cache(brw);
}
@@ -218,6 +191,7 @@ static struct dirty_bit_map mesa_bits[] = {
DEFINE_BIT(_NEW_MULTISAMPLE),
DEFINE_BIT(_NEW_TRACK_MATRIX),
DEFINE_BIT(_NEW_PROGRAM),
+ DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
{0, 0, 0}
};
@@ -231,7 +205,6 @@ static struct dirty_bit_map brw_bits[] = {
DEFINE_BIT(BRW_NEW_PRIMITIVE),
DEFINE_BIT(BRW_NEW_CONTEXT),
DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS),
- DEFINE_BIT(BRW_NEW_INPUT_VARYING),
DEFINE_BIT(BRW_NEW_PSP),
DEFINE_BIT(BRW_NEW_FENCE),
DEFINE_BIT(BRW_NEW_INDICES),
@@ -336,7 +309,7 @@ void brw_validate_state( struct brw_context *brw )
/* do prepare stage for all atoms */
for (i = 0; i < Elements(atoms); i++) {
- const struct brw_tracked_state *atom = brw->state.atoms[i];
+ const struct brw_tracked_state *atom = atoms[i];
if (brw->intel.Fallback)
break;
@@ -367,8 +340,8 @@ void brw_upload_state(struct brw_context *brw)
_mesa_memset(&examined, 0, sizeof(examined));
prev = *state;
- for (i = 0; i < brw->state.nr_atoms; i++) {
- const struct brw_tracked_state *atom = brw->state.atoms[i];
+ for (i = 0; i < Elements(atoms); i++) {
+ const struct brw_tracked_state *atom = atoms[i];
struct brw_state_flags generated;
assert(atom->dirty.mesa ||
@@ -397,7 +370,7 @@ void brw_upload_state(struct brw_context *brw)
}
else {
for (i = 0; i < Elements(atoms); i++) {
- const struct brw_tracked_state *atom = brw->state.atoms[i];
+ const struct brw_tracked_state *atom = atoms[i];
if (brw->intel.Fallback)
break;
diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h
index 89e2981203..8ba7eb27b3 100644
--- a/src/mesa/drivers/dri/i965/brw_structs.h
+++ b/src/mesa/drivers/dri/i965/brw_structs.h
@@ -33,6 +33,14 @@
#ifndef BRW_STRUCTS_H
#define BRW_STRUCTS_H
+
+/** Number of general purpose registers (VS, WM, etc) */
+#define BRW_MAX_GRF 128
+
+/** Number of message register file registers */
+#define BRW_MAX_MRF 16
+
+
/* Command packets:
*/
struct header
@@ -815,7 +823,9 @@ struct brw_gs_unit_state
struct
{
- GLuint pad0:10;
+ GLuint pad0:8;
+ GLuint rendering_enable:1; /* for IGDNG */
+ GLuint pad4:1;
GLuint stats_enable:1;
GLuint nr_urb_entries:7;
GLuint pad1:1;
@@ -923,6 +933,28 @@ struct brw_wm_unit_state
GLfloat global_depth_offset_constant;
GLfloat global_depth_offset_scale;
+
+ /* for IGDNG only */
+ struct {
+ GLuint pad0:1;
+ GLuint grf_reg_count_1:3;
+ GLuint pad1:2;
+ GLuint kernel_start_pointer_1:26;
+ } wm8;
+
+ struct {
+ GLuint pad0:1;
+ GLuint grf_reg_count_2:3;
+ GLuint pad1:2;
+ GLuint kernel_start_pointer_2:26;
+ } wm9;
+
+ struct {
+ GLuint pad0:1;
+ GLuint grf_reg_count_3:3;
+ GLuint pad1:2;
+ GLuint kernel_start_pointer_3:26;
+ } wm10;
};
struct brw_sampler_default_color {
@@ -1075,7 +1107,7 @@ struct brw_surface_state
GLuint y_offset:4;
GLuint pad0:1;
GLuint x_offset:7;
- } ss5; /* NEW in Integrated Graphics Device */
+ } ss5; /* New in G4X */
};
@@ -1298,6 +1330,14 @@ struct brw_instruction
GLuint pad1:6;
} ia16;
+ struct
+ {
+ GLuint pad:26;
+ GLuint end_of_thread:1;
+ GLuint pad1:1;
+ GLuint sfid:4;
+ } send_igdng; /* for IGDNG only */
+
} bits2;
union
@@ -1385,6 +1425,21 @@ struct brw_instruction
} math;
struct {
+ GLuint function:4;
+ GLuint int_type:1;
+ GLuint precision:1;
+ GLuint saturate:1;
+ GLuint data_type:1;
+ GLuint snapshot:1;
+ GLuint pad0:10;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } math_igdng;
+
+ struct {
GLuint binding_table_index:8;
GLuint sampler:4;
GLuint return_format:2;
@@ -1407,9 +1462,38 @@ struct brw_instruction
GLuint end_of_thread:1;
} sampler_g4x;
+ struct {
+ GLuint binding_table_index:8;
+ GLuint sampler:4;
+ GLuint msg_type:4;
+ GLuint simd_mode:2;
+ GLuint pad0:1;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } sampler_igdng;
+
struct brw_urb_immediate urb;
struct {
+ GLuint opcode:4;
+ GLuint offset:6;
+ GLuint swizzle_control:2;
+ GLuint pad:1;
+ GLuint allocate:1;
+ GLuint used:1;
+ GLuint complete:1;
+ GLuint pad0:3;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } urb_igdng;
+
+ struct {
GLuint binding_table_index:8;
GLuint msg_control:4;
GLuint msg_type:2;
@@ -1423,6 +1507,19 @@ struct brw_instruction
struct {
GLuint binding_table_index:8;
+ GLuint msg_control:3;
+ GLuint msg_type:3;
+ GLuint target_cache:2;
+ GLuint pad0:3;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } dp_read_igdng;
+
+ struct {
+ GLuint binding_table_index:8;
GLuint msg_control:3;
GLuint pixel_scoreboard_clear:1;
GLuint msg_type:3;
@@ -1435,6 +1532,20 @@ struct brw_instruction
} dp_write;
struct {
+ GLuint binding_table_index:8;
+ GLuint msg_control:3;
+ GLuint pixel_scoreboard_clear:1;
+ GLuint msg_type:3;
+ GLuint send_commit_msg:1;
+ GLuint pad0:3;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } dp_write_igdng;
+
+ struct {
GLuint pad:16;
GLuint response_length:4;
GLuint msg_length:4;
@@ -1443,6 +1554,15 @@ struct brw_instruction
GLuint end_of_thread:1;
} generic;
+ struct {
+ GLuint pad:19;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } generic_igdng;
+
GLint d;
GLuint ud;
} bits3;
diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c
index 51a617fcb4..7f9b253534 100644
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -28,7 +28,6 @@
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
-
/* Code to layout images in a mipmap tree for i965.
*/
@@ -37,17 +36,90 @@
#include "intel_tex_layout.h"
#include "intel_context.h"
#include "main/macros.h"
+#include "intel_chipset.h"
#define FILE_DEBUG_FLAG DEBUG_MIPTREE
-GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_tree *mt )
+GLboolean brw_miptree_layout(struct intel_context *intel,
+ struct intel_mipmap_tree *mt,
+ uint32_t tiling)
{
- /* XXX: these vary depending on image format:
- */
-/* GLint align_w = 4; */
+ /* XXX: these vary depending on image format: */
+ /* GLint align_w = 4; */
switch (mt->target) {
- case GL_TEXTURE_CUBE_MAP:
+ case GL_TEXTURE_CUBE_MAP:
+ if (IS_IGDNG(intel->intelScreen->deviceID)) {
+ GLuint align_h = 2, align_w = 4;
+ GLuint level;
+ GLuint x = 0;
+ GLuint y = 0;
+ GLuint width = mt->width0;
+ GLuint height = mt->height0;
+ GLuint qpitch = 0;
+ GLuint y_pitch = 0;
+
+ mt->pitch = mt->width0;
+ intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h);
+ y_pitch = ALIGN(height, align_h);
+
+ if (mt->compressed) {
+ mt->pitch = ALIGN(mt->width0, align_w);
+ qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * mt->pitch * mt->cpp;
+ mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * 6;
+ } else {
+ qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * mt->pitch * mt->cpp;
+ mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * 6;
+ }
+
+ if (mt->first_level != mt->last_level) {
+ GLuint mip1_width;
+
+ if (mt->compressed) {
+ mip1_width = ALIGN(minify(mt->width0), align_w)
+ + ALIGN(minify(minify(mt->width0)), align_w);
+ } else {
+ mip1_width = ALIGN(minify(mt->width0), align_w)
+ + minify(minify(mt->width0));
+ }
+
+ if (mip1_width > mt->pitch) {
+ mt->pitch = mip1_width;
+ }
+ }
+
+ mt->pitch = intel_miptree_pitch_align(intel, mt, tiling, mt->pitch);
+
+ for (level = mt->first_level; level <= mt->last_level; level++) {
+ GLuint img_height;
+ GLuint nr_images = 6;
+ GLuint q = 0;
+
+ intel_miptree_set_level_info(mt, level, nr_images, x, y, width,
+ height, 1);
+
+ for (q = 0; q < nr_images; q++)
+ intel_miptree_set_image_offset_ex(mt, level, q, x, y, q * qpitch);
+
+ if (mt->compressed)
+ img_height = MAX2(1, height/4);
+ else
+ img_height = ALIGN(height, align_h);
+
+ if (level == mt->first_level + 1) {
+ x += ALIGN(width, align_w);
+ }
+ else {
+ y += img_height;
+ }
+
+ width = minify(width);
+ height = minify(height);
+ }
+
+ break;
+ }
+
case GL_TEXTURE_3D: {
GLuint width = mt->width0;
GLuint height = mt->height0;
@@ -59,25 +131,25 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t
GLuint align_w = 4;
mt->total_height = 0;
-
+ intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h);
+
if (mt->compressed) {
- align_w = intel_compressed_alignment(mt->internal_format);
mt->pitch = ALIGN(width, align_w);
pack_y_pitch = (height + 3) / 4;
} else {
- mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0);
- pack_y_pitch = ALIGN(mt->height0, align_h);
+ mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0);
+ pack_y_pitch = ALIGN(mt->height0, align_h);
}
- pack_x_pitch = mt->pitch;
+ pack_x_pitch = width;
pack_x_nr = 1;
- for ( level = mt->first_level ; level <= mt->last_level ; level++ ) {
+ for (level = mt->first_level ; level <= mt->last_level ; level++) {
GLuint nr_images = mt->target == GL_TEXTURE_3D ? depth : 6;
GLint x = 0;
GLint y = 0;
GLint q, j;
-
+
intel_miptree_set_level_info(mt, level, nr_images,
0, mt->total_height,
width, height, depth);
@@ -89,7 +161,7 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t
}
x = 0;
- y += pack_y_pitch;
+ y += pack_y_pitch;
}
@@ -98,40 +170,40 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t
height = minify(height);
depth = minify(depth);
- if (mt->compressed) {
- pack_y_pitch = (height + 3) / 4;
-
- if (pack_x_pitch > ALIGN(width, align_w)) {
- pack_x_pitch = ALIGN(width, align_w);
- pack_x_nr <<= 1;
- }
- } else {
- if (pack_x_pitch > 4) {
- pack_x_pitch >>= 1;
- pack_x_nr <<= 1;
- assert(pack_x_pitch * pack_x_nr <= mt->pitch);
- }
-
- if (pack_y_pitch > 2) {
- pack_y_pitch >>= 1;
- pack_y_pitch = ALIGN(pack_y_pitch, align_h);
- }
- }
+ if (mt->compressed) {
+ pack_y_pitch = (height + 3) / 4;
+
+ if (pack_x_pitch > ALIGN(width, align_w)) {
+ pack_x_pitch = ALIGN(width, align_w);
+ pack_x_nr <<= 1;
+ }
+ } else {
+ if (pack_x_pitch > 4) {
+ pack_x_pitch >>= 1;
+ pack_x_nr <<= 1;
+ assert(pack_x_pitch * pack_x_nr <= mt->pitch);
+ }
+
+ if (pack_y_pitch > 2) {
+ pack_y_pitch >>= 1;
+ pack_y_pitch = ALIGN(pack_y_pitch, align_h);
+ }
+ }
}
break;
}
default:
- i945_miptree_layout_2d(intel, mt);
+ i945_miptree_layout_2d(intel, mt, tiling);
break;
}
- DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__,
- mt->pitch,
+ DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__,
+ mt->pitch,
mt->total_height,
mt->cpp,
mt->pitch * mt->total_height * mt->cpp );
-
+
return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c
index 7673dd36eb..8c6f4355a6 100644
--- a/src/mesa/drivers/dri/i965/brw_urb.c
+++ b/src/mesa/drivers/dri/i965/brw_urb.c
@@ -143,7 +143,29 @@ static void recalculate_urb_fence( struct brw_context *brw )
brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries;
brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries;
-
+
+ brw->urb.constrained = 0;
+
+ if (BRW_IS_IGDNG(brw)) {
+ brw->urb.nr_vs_entries = 128;
+ brw->urb.nr_sf_entries = 48;
+ if (check_urb_layout(brw)) {
+ goto done;
+ } else {
+ brw->urb.constrained = 1;
+ brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
+ brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
+ }
+ } else if (BRW_IS_G4X(brw)) {
+ brw->urb.nr_vs_entries = 64;
+ if (check_urb_layout(brw)) {
+ goto done;
+ } else {
+ brw->urb.constrained = 1;
+ brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
+ }
+ }
+
if (!check_urb_layout(brw)) {
brw->urb.nr_vs_entries = limits[VS].min_nr_entries;
brw->urb.nr_gs_entries = limits[GS].min_nr_entries;
@@ -169,9 +191,8 @@ static void recalculate_urb_fence( struct brw_context *brw )
if (INTEL_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS))
_mesa_printf("URB CONSTRAINED\n");
}
- else
- brw->urb.constrained = 0;
+done:
if (INTEL_DEBUG & DEBUG_URB)
_mesa_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
brw->urb.vs_start,
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index 1e4f66091e..4a591365c9 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -58,6 +58,7 @@ struct brw_vs_compile {
GLuint first_output;
GLuint nr_outputs;
+ GLuint first_overflow_output; /**< VERT_ATTRIB_x */
GLuint first_tmp;
GLuint last_tmp;
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index b69616d6e5..514f15d5e3 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -69,13 +69,18 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
{
GLuint i, reg = 0, mrf;
-#if 0
- if (c->vp->program.Base.Parameters->NumParameters >= 6)
- c->vp->use_const_buffer = 1;
+ /* Determine whether to use a real constant buffer or use a block
+ * of GRF registers for constants. The later is faster but only
+ * works if everything fits in the GRF.
+ * XXX this heuristic/check may need some fine tuning...
+ */
+ if (c->vp->program.Base.Parameters->NumParameters +
+ c->vp->program.Base.NumTemporaries + 20 > BRW_MAX_GRF)
+ c->vp->use_const_buffer = GL_TRUE;
else
-#endif
c->vp->use_const_buffer = GL_FALSE;
- /*printf("use_const_buffer = %d\n", c->use_const_buffer);*/
+
+ /*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/
/* r0 -- reserved as usual
*/
@@ -124,15 +129,21 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
}
}
- /* Allocate outputs: TODO: could organize the non-position outputs
- * to go straight into message regs.
+ /* Allocate outputs. The non-position outputs go straight into message regs.
*/
c->nr_outputs = 0;
c->first_output = reg;
- mrf = 4;
+ c->first_overflow_output = 0;
+
+ if (BRW_IS_IGDNG(c->func.brw))
+ mrf = 8;
+ else
+ mrf = 4;
+
for (i = 0; i < VERT_RESULT_MAX; i++) {
if (c->prog_data.outputs_written & (1 << i)) {
c->nr_outputs++;
+ assert(i < Elements(c->regs[PROGRAM_OUTPUT]));
if (i == VERT_RESULT_HPOS) {
c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
reg++;
@@ -143,8 +154,17 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
mrf++; /* just a placeholder? XXX fix later stages & remove this */
}
else {
- c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf);
- mrf++;
+ if (mrf < 16) {
+ c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf);
+ mrf++;
+ }
+ else {
+ /* too many vertex results to fit in MRF, use GRF for overflow */
+ if (!c->first_overflow_output)
+ c->first_overflow_output = i;
+ c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
+ reg++;
+ }
}
}
}
@@ -201,7 +221,11 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
*/
c->prog_data.urb_read_length = (c->nr_inputs + 1) / 2;
- c->prog_data.urb_entry_size = (c->nr_outputs + 2 + 3) / 4;
+ if (BRW_IS_IGDNG(c->func.brw))
+ c->prog_data.urb_entry_size = (c->nr_outputs + 6 + 3) / 4;
+ else
+ c->prog_data.urb_entry_size = (c->nr_outputs + 2 + 3) / 4;
+
c->prog_data.total_grf = reg;
if (INTEL_DEBUG & DEBUG_VS) {
@@ -709,10 +733,11 @@ get_constant(struct brw_vs_compile *c,
struct brw_compile *p = &c->func;
struct brw_reg const_reg;
struct brw_reg const2_reg;
+ const GLboolean relAddr = src->RelAddr;
assert(argIndex < 3);
- if (c->current_const[argIndex].index != src->Index || src->RelAddr) {
+ if (c->current_const[argIndex].index != src->Index || relAddr) {
struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0];
c->current_const[argIndex].index = src->Index;
@@ -725,13 +750,13 @@ get_constant(struct brw_vs_compile *c,
brw_dp_READ_4_vs(p,
c->current_const[argIndex].reg,/* writeback dest */
0, /* oword */
- src->RelAddr, /* relative indexing? */
+ relAddr, /* relative indexing? */
addrReg, /* address register */
16 * src->Index, /* byte offset */
SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
);
- if (src->RelAddr) {
+ if (relAddr) {
/* second read */
const2_reg = get_tmp(c);
@@ -742,7 +767,7 @@ get_constant(struct brw_vs_compile *c,
brw_dp_READ_4_vs(p,
const2_reg, /* writeback dest */
1, /* oword */
- src->RelAddr, /* relative indexing? */
+ relAddr, /* relative indexing? */
addrReg, /* address register */
16 * src->Index, /* byte offset */
SURF_INDEX_VERT_CONST_BUFFER
@@ -752,7 +777,7 @@ get_constant(struct brw_vs_compile *c,
const_reg = c->current_const[argIndex].reg;
- if (src->RelAddr) {
+ if (relAddr) {
/* merge the two Owords into the constant register */
/* const_reg[7..4] = const2_reg[7..4] */
brw_MOV(p,
@@ -1061,6 +1086,8 @@ static void emit_vertex_write( struct brw_vs_compile *c)
struct brw_reg m0 = brw_message_reg(0);
struct brw_reg pos = c->regs[PROGRAM_OUTPUT][VERT_RESULT_HPOS];
struct brw_reg ndc;
+ int eot;
+ GLuint len_vertext_header = 2;
if (c->key.copy_edgeflag) {
brw_MOV(p,
@@ -1070,14 +1097,16 @@ static void emit_vertex_write( struct brw_vs_compile *c)
/* Build ndc coords */
ndc = get_tmp(c);
+ /* ndc = 1.0 / pos.w */
emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL);
+ /* ndc.xyz = pos * ndc */
brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc);
/* Update the header for point size, user clipping flags, and -ve rhw
* workaround.
*/
if ((c->prog_data.outputs_written & (1<<VERT_RESULT_PSIZ)) ||
- c->key.nr_userclip || !BRW_IS_G4X(p->brw))
+ c->key.nr_userclip || BRW_IS_965(p->brw))
{
struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
GLuint i;
@@ -1108,7 +1137,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
* Later, clipping will detect ucp[6] and ensure the primitive is
* clipped against all fixed planes.
*/
- if (!BRW_IS_G4X(p->brw)) {
+ if (BRW_IS_965(p->brw)) {
brw_CMP(p,
vec8(brw_null_reg()),
BRW_CONDITIONAL_L,
@@ -1135,7 +1164,23 @@ static void emit_vertex_write( struct brw_vs_compile *c)
*/
brw_set_access_mode(p, BRW_ALIGN_1);
brw_MOV(p, offset(m0, 2), ndc);
- brw_MOV(p, offset(m0, 3), pos);
+
+ if (BRW_IS_IGDNG(p->brw)) {
+ /* There are 20 DWs (D0-D19) in VUE vertex header on IGDNG */
+ brw_MOV(p, offset(m0, 3), pos); /* a portion of vertex header */
+ /* m4, m5 contain the distances from vertex to the user clip planeXXX.
+ * Seems it is useless for us.
+ * m6 is used for aligning, so that the remainder of vertex element is
+ * reg-aligned.
+ */
+ brw_MOV(p, offset(m0, 7), pos); /* the remainder of vertex element */
+ len_vertext_header = 6;
+ } else {
+ brw_MOV(p, offset(m0, 3), pos);
+ len_vertext_header = 2;
+ }
+
+ eot = (c->first_overflow_output == 0);
brw_urb_WRITE(p,
brw_null_reg(), /* dest */
@@ -1143,12 +1188,43 @@ static void emit_vertex_write( struct brw_vs_compile *c)
c->r0, /* src */
0, /* allocate */
1, /* used */
- c->nr_outputs + 3, /* msg len */
+ MIN2(c->nr_outputs + 1 + len_vertext_header, (BRW_MAX_MRF-1)), /* msg len */
0, /* response len */
- 1, /* eot */
+ eot, /* eot */
1, /* writes complete */
0, /* urb destination offset */
BRW_URB_SWIZZLE_INTERLEAVE);
+
+ if (c->first_overflow_output > 0) {
+ /* Not all of the vertex outputs/results fit into the MRF.
+ * Move the overflowed attributes from the GRF to the MRF and
+ * issue another brw_urb_WRITE().
+ */
+ /* XXX I'm not 100% sure about which MRF regs to use here. Starting
+ * at mrf[4] atm...
+ */
+ GLuint i, mrf = 0;
+ for (i = c->first_overflow_output; i < VERT_RESULT_MAX; i++) {
+ if (c->prog_data.outputs_written & (1 << i)) {
+ /* move from GRF to MRF */
+ brw_MOV(p, brw_message_reg(4+mrf), c->regs[PROGRAM_OUTPUT][i]);
+ mrf++;
+ }
+ }
+
+ brw_urb_WRITE(p,
+ brw_null_reg(), /* dest */
+ 4, /* starting mrf reg nr */
+ c->r0, /* src */
+ 0, /* allocate */
+ 1, /* used */
+ mrf+1, /* msg len */
+ 0, /* response len */
+ 1, /* eot */
+ 1, /* writes complete */
+ BRW_MAX_MRF-1, /* urb destination offset */
+ BRW_URB_SWIZZLE_INTERLEAVE);
+ }
}
@@ -1177,15 +1253,15 @@ post_vs_emit( struct brw_vs_compile *c,
*/
void brw_vs_emit(struct brw_vs_compile *c )
{
-#define MAX_IFSN 32
+#define MAX_IF_DEPTH 32
+#define MAX_LOOP_DEPTH 32
struct brw_compile *p = &c->func;
- GLuint nr_insns = c->vp->program.Base.NumInstructions;
- GLuint insn, if_insn = 0;
+ const GLuint nr_insns = c->vp->program.Base.NumInstructions;
+ GLuint insn, if_depth = 0, loop_depth = 0;
GLuint end_offset = 0;
struct brw_instruction *end_inst, *last_inst;
- struct brw_instruction *if_inst[MAX_IFSN];
- struct brw_indirect stack_index = brw_indirect(0, 0);
-
+ struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH];
+ const struct brw_indirect stack_index = brw_indirect(0, 0);
GLuint index;
GLuint file;
@@ -1219,7 +1295,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
for (insn = 0; insn < nr_insns; insn++) {
- struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn];
+ const struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn];
struct brw_reg args[3], dst;
GLuint i;
@@ -1232,7 +1308,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
*/
if (inst->Opcode != OPCODE_SWZ)
for (i = 0; i < 3; i++) {
- struct prog_src_register *src = &inst->SrcReg[i];
+ const struct prog_src_register *src = &inst->SrcReg[i];
index = src->Index;
file = src->File;
if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
@@ -1376,16 +1452,51 @@ void brw_vs_emit(struct brw_vs_compile *c )
emit_xpd(p, dst, args[0], args[1]);
break;
case OPCODE_IF:
- assert(if_insn < MAX_IFSN);
- if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8);
+ assert(if_depth < MAX_IF_DEPTH);
+ if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8);
break;
case OPCODE_ELSE:
- if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]);
+ if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
break;
case OPCODE_ENDIF:
- assert(if_insn > 0);
- brw_ENDIF(p, if_inst[--if_insn]);
+ assert(if_depth > 0);
+ brw_ENDIF(p, if_inst[--if_depth]);
break;
+#if 0
+ case OPCODE_BGNLOOP:
+ loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
+ break;
+ case OPCODE_BRK:
+ brw_BREAK(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case OPCODE_CONT:
+ brw_CONT(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case OPCODE_ENDLOOP:
+ {
+ struct brw_instruction *inst0, *inst1;
+ loop_depth--;
+ inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
+ /* patch all the BREAK/CONT instructions from last BEGINLOOP */
+ while (inst0 > loop_inst[loop_depth]) {
+ inst0--;
+ if (inst0->header.opcode == BRW_OPCODE_BREAK) {
+ inst0->bits3.if_else.jump_count = inst1 - inst0 + 1;
+ inst0->bits3.if_else.pop_count = 0;
+ }
+ else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+ inst0->bits3.if_else.jump_count = inst1 - inst0;
+ inst0->bits3.if_else.pop_count = 0;
+ }
+ }
+ }
+ break;
+#else
+ (void) loop_inst;
+ (void) loop_depth;
+#endif
case OPCODE_BRA:
brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
index 3d29538843..d790ab6555 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_state.c
@@ -97,7 +97,11 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
* brw_urb_WRITE() results.
*/
vs.thread1.single_program_flow = 0;
- vs.thread1.binding_table_entry_count = key->nr_surfaces;
+
+ if (BRW_IS_IGDNG(brw))
+ vs.thread1.binding_table_entry_count = 0; /* hardware requirement */
+ else
+ vs.thread1.binding_table_entry_count = key->nr_surfaces;
vs.thread3.urb_entry_read_length = key->urb_entry_read_length;
vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
@@ -105,10 +109,16 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
vs.thread3.urb_entry_read_offset = 0;
vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
- vs.thread4.nr_urb_entries = key->nr_urb_entries;
+ if (BRW_IS_IGDNG(brw))
+ vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2;
+ else
+ vs.thread4.nr_urb_entries = key->nr_urb_entries;
+
vs.thread4.urb_entry_allocation_size = key->urb_size - 1;
- if (BRW_IS_G4X(brw))
+ if (BRW_IS_IGDNG(brw))
+ chipset_max_threads = 72;
+ else if (BRW_IS_G4X(brw))
chipset_max_threads = 32;
else
chipset_max_threads = 16;
@@ -120,6 +130,8 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
/* No samplers for ARB_vp programs:
*/
+ /* It has to be set to 0 for IGDNG
+ */
vs.vs5.sampler_count = 0;
if (INTEL_DEBUG & DEBUG_STATS)
diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
new file mode 100644
index 0000000000..89f47522a1
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@@ -0,0 +1,226 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/mtypes.h"
+#include "main/texformat.h"
+#include "main/texstore.h"
+#include "shader/prog_parameter.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+/* Creates a new VS constant buffer reflecting the current VS program's
+ * constants, if needed by the VS program.
+ *
+ * Otherwise, constants go through the CURBEs using the brw_constant_buffer
+ * state atom.
+ */
+static drm_intel_bo *
+brw_vs_update_constant_buffer(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+ struct brw_vertex_program *vp =
+ (struct brw_vertex_program *) brw->vertex_program;
+ const struct gl_program_parameter_list *params = vp->program.Base.Parameters;
+ const int size = params->NumParameters * 4 * sizeof(GLfloat);
+ drm_intel_bo *const_buffer;
+
+ /* BRW_NEW_VERTEX_PROGRAM */
+ if (!vp->use_const_buffer)
+ return NULL;
+
+ const_buffer = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer",
+ size, 64);
+
+ /* _NEW_PROGRAM_CONSTANTS */
+ dri_bo_subdata(const_buffer, 0, size, params->ParameterValues);
+
+ return const_buffer;
+}
+
+/**
+ * Update the surface state for a VS constant buffer.
+ *
+ * Sets brw->vs.surf_bo[surf] and brw->vp->const_buffer.
+ */
+static void
+brw_update_vs_constant_surface( GLcontext *ctx,
+ GLuint surf)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_surface_key key;
+ struct brw_vertex_program *vp =
+ (struct brw_vertex_program *) brw->vertex_program;
+ const struct gl_program_parameter_list *params = vp->program.Base.Parameters;
+
+ assert(surf == 0);
+
+ /* If we're in this state update atom, we need to update VS constants, so
+ * free the old buffer and create a new one for the new contents.
+ */
+ dri_bo_unreference(vp->const_buffer);
+ vp->const_buffer = brw_vs_update_constant_buffer(brw);
+
+ /* If there's no constant buffer, then no surface BO is needed to point at
+ * it.
+ */
+ if (vp->const_buffer == 0) {
+ drm_intel_bo_unreference(brw->vs.surf_bo[surf]);
+ brw->vs.surf_bo[surf] = NULL;
+ return;
+ }
+
+ memset(&key, 0, sizeof(key));
+
+ key.format = MESA_FORMAT_RGBA_FLOAT32;
+ key.internal_format = GL_RGBA;
+ key.bo = vp->const_buffer;
+ key.depthmode = GL_NONE;
+ key.pitch = params->NumParameters;
+ key.width = params->NumParameters;
+ key.height = 1;
+ key.depth = 1;
+ key.cpp = 16;
+
+ /*
+ printf("%s:\n", __FUNCTION__);
+ printf(" width %d height %d depth %d cpp %d pitch %d\n",
+ key.width, key.height, key.depth, key.cpp, key.pitch);
+ */
+
+ drm_intel_bo_unreference(brw->vs.surf_bo[surf]);
+ brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
+ BRW_SS_SURFACE,
+ &key, sizeof(key),
+ &key.bo, key.bo ? 1 : 0,
+ NULL);
+ if (brw->vs.surf_bo[surf] == NULL) {
+ brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key);
+ }
+}
+
+
+/**
+ * Constructs the binding table for the VS surface state.
+ */
+static dri_bo *
+brw_vs_get_binding_table(struct brw_context *brw)
+{
+ dri_bo *bind_bo;
+
+ bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
+ NULL, 0,
+ brw->vs.surf_bo, BRW_VS_MAX_SURF,
+ NULL);
+
+ if (bind_bo == NULL) {
+ GLuint data_size = BRW_VS_MAX_SURF * sizeof(GLuint);
+ uint32_t *data = malloc(data_size);
+ int i;
+
+ for (i = 0; i < BRW_VS_MAX_SURF; i++)
+ if (brw->vs.surf_bo[i])
+ data[i] = brw->vs.surf_bo[i]->offset;
+ else
+ data[i] = 0;
+
+ bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
+ NULL, 0,
+ brw->vs.surf_bo, BRW_VS_MAX_SURF,
+ data, data_size,
+ NULL, NULL);
+
+ /* Emit binding table relocations to surface state */
+ for (i = 0; i < BRW_VS_MAX_SURF; i++) {
+ if (brw->vs.surf_bo[i] != NULL) {
+ /* The presumed offsets were set in the data values for
+ * brw_upload_cache.
+ */
+ drm_intel_bo_emit_reloc(bind_bo, i * 4,
+ brw->vs.surf_bo[i], 0,
+ I915_GEM_DOMAIN_INSTRUCTION, 0);
+ }
+ }
+
+ free(data);
+ }
+
+ return bind_bo;
+}
+
+/**
+ * Vertex shader surfaces (constant buffer).
+ *
+ * This consumes the state updates for the constant buffer needing
+ * to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit and
+ * CACHE_NEW_SURF_BIND for the binding table upload.
+ */
+static void prepare_vs_surfaces(struct brw_context *brw )
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ int i;
+ int nr_surfaces = 0;
+
+ brw_update_vs_constant_surface(ctx, SURF_INDEX_VERT_CONST_BUFFER);
+
+ for (i = 0; i < BRW_VS_MAX_SURF; i++) {
+ if (brw->vs.surf_bo[i] != NULL) {
+ nr_surfaces = i + 1;
+ }
+ }
+
+ if (brw->vs.nr_surfaces != nr_surfaces) {
+ brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES;
+ brw->vs.nr_surfaces = nr_surfaces;
+ }
+
+ /* Note that we don't end up updating the bind_bo if we don't have a
+ * surface to be pointing at. This should be relatively harmless, as it
+ * just slightly increases our working set size.
+ */
+ if (brw->vs.nr_surfaces != 0) {
+ dri_bo_unreference(brw->vs.bind_bo);
+ brw->vs.bind_bo = brw_vs_get_binding_table(brw);
+ }
+}
+
+const struct brw_tracked_state brw_vs_surfaces = {
+ .dirty = {
+ .mesa = (_NEW_PROGRAM_CONSTANTS),
+ .brw = (BRW_NEW_VERTEX_PROGRAM),
+ .cache = 0
+ },
+ .prepare = prepare_vs_surfaces,
+};
+
+
+
diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c
index ba03afd6c1..ac11790151 100644
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -177,14 +177,6 @@ static void brw_note_fence( struct intel_context *intel, GLuint fence )
brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE;
}
-
-static void brw_note_unlock( struct intel_context *intel )
-{
- struct brw_context *brw = brw_context(&intel->ctx);
- brw_state_cache_check_size(brw);
-}
-
-
/* called from intelWaitForIdle() and intelFlush()
*
* For now, just flush everything. Could be smarter later.
@@ -194,7 +186,7 @@ static GLuint brw_flush_cmd( void )
struct brw_mi_flush flush;
flush.opcode = CMD_MI_FLUSH;
flush.pad = 0;
- flush.flags = BRW_FLUSH_READ_CACHE | BRW_FLUSH_STATE_CACHE;
+ flush.flags = BRW_FLUSH_STATE_CACHE;
return *(GLuint *)&flush;
}
@@ -215,7 +207,6 @@ void brwInitVtbl( struct brw_context *brw )
brw->intel.vtbl.invalidate_state = brw_invalidate_state;
brw->intel.vtbl.note_fence = brw_note_fence;
- brw->intel.vtbl.note_unlock = brw_note_unlock;
brw->intel.vtbl.new_batch = brw_new_batch;
brw->intel.vtbl.finish_batch = brw_finish_batch;
brw->intel.vtbl.destroy = brw_destroy_context;
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 8a3b7df9c7..14e05be4f6 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -272,6 +272,9 @@ static void brw_wm_populate_key( struct brw_context *brw,
/* _NEW_LIGHT */
key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT);
+ /* _NEW_HINT */
+ key->linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST);
+
/* _NEW_TEXTURE */
for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
const struct gl_texture_unit *unit = &ctx->Texture.Unit[i];
@@ -351,6 +354,7 @@ const struct brw_tracked_state brw_wm_prog = {
.dirty = {
.mesa = (_NEW_COLOR |
_NEW_DEPTH |
+ _NEW_HINT |
_NEW_STENCIL |
_NEW_POLYGON |
_NEW_LINE |
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index 295fed851b..ba497432c6 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -63,6 +63,7 @@ struct brw_wm_prog_key {
GLuint computes_depth:1; /* could be derived from program string */
GLuint source_depth_to_render_target:1;
GLuint flat_shade:1;
+ GLuint linear_color:1; /**< linear interpolation vs perspective interp */
GLuint runtime_check_aads_emit:1;
GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */
@@ -241,15 +242,20 @@ struct brw_wm_compile {
GLuint max_wm_grf;
GLuint last_scratch;
+ GLuint cur_inst; /**< index of current instruction */
+
+ GLboolean out_of_regs; /**< ran out of GRF registers? */
+
/** Mapping from Mesa registers to hardware registers */
struct {
GLboolean inited;
struct brw_reg reg;
} wm_regs[PROGRAM_PAYLOAD+1][256][4];
+ GLboolean used_grf[BRW_WM_MAX_GRF];
+ GLuint first_free_grf;
struct brw_reg stack;
struct brw_reg emit_mask_reg;
- GLuint reg_index; /**< Index of next free GRF register */
GLuint tmp_regs[BRW_WM_MAX_GRF];
GLuint tmp_index;
GLuint tmp_max;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index a870e75e52..9f82916c02 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -353,6 +353,19 @@ static void emit_mad( struct brw_compile *p,
}
}
+static void emit_trunc( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_RNDZ(p, dst[i], arg0[i]);
+ }
+ }
+}
static void emit_lrp( struct brw_compile *p,
const struct brw_reg *dst,
@@ -701,6 +714,7 @@ static void emit_tex( struct brw_wm_compile *c,
GLuint msgLength, responseLength;
GLuint i, nr;
GLuint emit;
+ GLuint msg_type;
/* How many input regs are there?
*/
@@ -738,6 +752,18 @@ static void emit_tex( struct brw_wm_compile *c,
responseLength = 8; /* always */
+ if (BRW_IS_IGDNG(p->brw)) {
+ if (inst->tex_shadow)
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG;
+ } else {
+ if (inst->tex_shadow)
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
+ }
+
brw_SAMPLE(p,
retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
1,
@@ -745,12 +771,12 @@ static void emit_tex( struct brw_wm_compile *c,
SURF_INDEX_TEXTURE(inst->tex_unit),
inst->tex_unit, /* sampler */
inst->writemask,
- (inst->tex_shadow ?
- BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE :
- BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE),
+ msg_type,
responseLength,
msgLength,
- 0);
+ 0,
+ 1,
+ BRW_SAMPLER_SIMD_MODE_SIMD16);
}
@@ -762,7 +788,7 @@ static void emit_txb( struct brw_wm_compile *c,
{
struct brw_compile *p = &c->func;
GLuint msgLength;
-
+ GLuint msg_type;
/* Shadow ignored for txb.
*/
switch (inst->tex_idx) {
@@ -787,6 +813,11 @@ static void emit_txb( struct brw_wm_compile *c,
brw_MOV(p, brw_message_reg(8), arg[3]);
msgLength = 9;
+ if (BRW_IS_IGDNG(p->brw))
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
+
brw_SAMPLE(p,
retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
1,
@@ -794,10 +825,12 @@ static void emit_txb( struct brw_wm_compile *c,
SURF_INDEX_TEXTURE(inst->tex_unit),
inst->tex_unit, /* sampler */
inst->writemask,
- BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
+ msg_type,
8, /* responseLength */
msgLength,
- 0);
+ 0,
+ 1,
+ BRW_SAMPLER_SIMD_MODE_SIMD16);
}
@@ -1009,7 +1042,7 @@ static void emit_fb_write( struct brw_wm_compile *c,
get_element_ud(brw_vec8_grf(1,0), 6),
brw_imm_ud(1<<26));
- jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
+ jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
{
emit_aa(c, arg1, 2);
fire_fb_write(c, 0, nr, target, eot);
@@ -1222,6 +1255,10 @@ void brw_wm_emit( struct brw_wm_compile *c )
emit_dph(p, dst, dst_flags, args[0], args[1]);
break;
+ case OPCODE_TRUNC:
+ emit_trunc(p, dst, dst_flags, args[0]);
+ break;
+
case OPCODE_LRP:
emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c
index 49aad281d7..b9e8dd2e96 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_fp.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c
@@ -354,13 +354,25 @@ static void emit_interp( struct brw_wm_compile *c,
src_undef());
}
else {
- emit_op(c,
- WM_LINTERP,
- dst,
- 0,
- interp,
- deltas,
- src_undef());
+ if (c->key.linear_color) {
+ emit_op(c,
+ WM_LINTERP,
+ dst,
+ 0,
+ interp,
+ deltas,
+ src_undef());
+ }
+ else {
+ /* perspective-corrected color interpolation */
+ emit_op(c,
+ WM_PINTERP,
+ dst,
+ 0,
+ interp,
+ deltas,
+ get_pixel_w(c));
+ }
}
break;
case FRAG_ATTRIB_FOGC:
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
index a907e1b788..19f777fe32 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -1,5 +1,7 @@
#include "main/macros.h"
#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+#include "shader/prog_optimize.h"
#include "brw_context.h"
#include "brw_eu.h"
#include "brw_wm.h"
@@ -21,7 +23,6 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
const struct prog_instruction *inst = &fp->Base.Instructions[i];
switch (inst->Opcode) {
case OPCODE_IF:
- case OPCODE_TRUNC:
case OPCODE_ENDIF:
case OPCODE_CAL:
case OPCODE_BRK:
@@ -42,6 +43,83 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
}
+
+static void
+reclaim_temps(struct brw_wm_compile *c);
+
+
+/** Mark GRF register as used. */
+static void
+prealloc_grf(struct brw_wm_compile *c, int r)
+{
+ c->used_grf[r] = GL_TRUE;
+}
+
+
+/** Mark given GRF register as not in use. */
+static void
+release_grf(struct brw_wm_compile *c, int r)
+{
+ /*assert(c->used_grf[r]);*/
+ c->used_grf[r] = GL_FALSE;
+ c->first_free_grf = MIN2(c->first_free_grf, r);
+}
+
+
+/** Return index of a free GRF, mark it as used. */
+static int
+alloc_grf(struct brw_wm_compile *c)
+{
+ GLuint r;
+ for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
+ if (!c->used_grf[r]) {
+ c->used_grf[r] = GL_TRUE;
+ c->first_free_grf = r + 1; /* a guess */
+ return r;
+ }
+ }
+
+ /* no free temps, try to reclaim some */
+ reclaim_temps(c);
+ c->first_free_grf = 0;
+
+ /* try alloc again */
+ for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
+ if (!c->used_grf[r]) {
+ c->used_grf[r] = GL_TRUE;
+ c->first_free_grf = r + 1; /* a guess */
+ return r;
+ }
+ }
+
+ for (r = 0; r < BRW_WM_MAX_GRF; r++) {
+ assert(c->used_grf[r]);
+ }
+
+ /* really, no free GRF regs found */
+ if (!c->out_of_regs) {
+ /* print warning once per compilation */
+ _mesa_warning(NULL, "i965: ran out of registers for fragment program");
+ c->out_of_regs = GL_TRUE;
+ }
+
+ return -1;
+}
+
+
+/** Return number of GRF registers used */
+static int
+num_grf_used(const struct brw_wm_compile *c)
+{
+ int r;
+ for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--)
+ if (c->used_grf[r])
+ return r + 1;
+ return 0;
+}
+
+
+
/**
* Record the mapping of a Mesa register to a hardware register.
*/
@@ -68,11 +146,23 @@ static int get_scalar_dst_index(const struct prog_instruction *inst)
static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
{
struct brw_reg reg;
- if(c->tmp_index == c->tmp_max)
- c->tmp_regs[ c->tmp_max++ ] = c->reg_index++;
-
+
+ /* if we need to allocate another temp, grow the tmp_regs[] array */
+ if (c->tmp_index == c->tmp_max) {
+ int r = alloc_grf(c);
+ if (r < 0) {
+ /*printf("Out of temps in %s\n", __FUNCTION__);*/
+ r = 50; /* XXX random register! */
+ }
+ c->tmp_regs[ c->tmp_max++ ] = r;
+ }
+
+ /* form the GRF register */
reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0);
+ /*printf("alloc_temp %d\n", reg.nr);*/
+ assert(reg.nr < BRW_WM_MAX_GRF);
return reg;
+
}
/**
@@ -130,35 +220,29 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component,
return brw_null_reg();
}
+ assert(index < 256);
+ assert(component < 4);
+
/* see if we've already allocated a HW register for this Mesa register */
if (c->wm_regs[file][index][component].inited) {
- /* yes, re-use */
- reg = c->wm_regs[file][index][component].reg;
+ /* yes, re-use */
+ reg = c->wm_regs[file][index][component].reg;
}
else {
/* no, allocate new register */
- reg = brw_vec8_grf(c->reg_index, 0);
- }
+ int grf = alloc_grf(c);
+ /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/
+ if (grf < 0) {
+ /* totally out of temps */
+ grf = 51; /* XXX random register! */
+ }
- /* if this is a new register allocation, record it in the table */
- if (!c->wm_regs[file][index][component].inited) {
- set_reg(c, file, index, component, reg);
- c->reg_index++;
- }
+ reg = brw_vec8_grf(grf, 0);
+ /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/
- if (c->reg_index >= BRW_WM_MAX_GRF - 12) {
- /* ran out of temporary registers! */
-#if 1
- /* This is a big hack for now.
- * Return bad register index, just don't hang the GPU.
- */
- _mesa_fprintf(stderr, "out of regs %d\n", c->reg_index);
- c->reg_index = BRW_WM_MAX_GRF - 13;
-#else
- return brw_null_reg();
-#endif
+ set_reg(c, file, index, component, reg);
}
-
+
if (neg & (1 << component)) {
reg = negate(reg);
}
@@ -168,6 +252,46 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component,
}
+
+/**
+ * This is called if we run out of GRF registers. Examine the live intervals
+ * of temp regs in the program and free those which won't be used again.
+ */
+static void
+reclaim_temps(struct brw_wm_compile *c)
+{
+ GLint intBegin[MAX_PROGRAM_TEMPS];
+ GLint intEnd[MAX_PROGRAM_TEMPS];
+ int index;
+
+ /*printf("Reclaim temps:\n");*/
+
+ _mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns,
+ intBegin, intEnd);
+
+ for (index = 0; index < MAX_PROGRAM_TEMPS; index++) {
+ if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) {
+ /* program temp[i] can be freed */
+ int component;
+ /*printf(" temp[%d] is dead\n", index);*/
+ for (component = 0; component < 4; component++) {
+ if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) {
+ int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr;
+ release_grf(c, r);
+ /*
+ printf(" Reclaim temp %d, reg %d at inst %d\n",
+ index, r, c->cur_inst);
+ */
+ c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE;
+ }
+ }
+ }
+ }
+}
+
+
+
+
/**
* Preallocate registers. This sets up the Mesa to hardware register
* mapping for certain registers, such as constants (uniforms/state vars)
@@ -179,6 +303,10 @@ static void prealloc_reg(struct brw_wm_compile *c)
struct brw_reg reg;
int urb_read_length = 0;
GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted;
+ GLuint reg_index = 0;
+
+ memset(c->used_grf, GL_FALSE, sizeof(c->used_grf));
+ c->first_free_grf = 0;
for (i = 0; i < 4; i++) {
if (i < c->key.nr_depth_regs)
@@ -187,14 +315,20 @@ static void prealloc_reg(struct brw_wm_compile *c)
reg = brw_vec8_grf(0, 0);
set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg);
}
- c->reg_index += 2 * c->key.nr_depth_regs;
+ reg_index += 2 * c->key.nr_depth_regs;
/* constants */
{
- const int nr_params = c->fp->program.Base.Parameters->NumParameters;
+ const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters;
+ const GLuint nr_temps = c->fp->program.Base.NumTemporaries;
/* use a real constant buffer, or just use a section of the GRF? */
- c->fp->use_const_buffer = GL_FALSE; /* (nr_params > 8);*/
+ /* XXX this heuristic may need adjustment... */
+ if ((nr_params + nr_temps) * 4 + reg_index > 80)
+ c->fp->use_const_buffer = GL_TRUE;
+ else
+ c->fp->use_const_buffer = GL_FALSE;
+ /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
if (c->fp->use_const_buffer) {
/* We'll use a real constant buffer and fetch constants from
@@ -216,7 +350,7 @@ static void prealloc_reg(struct brw_wm_compile *c)
for (i = 0; i < nr_params; i++) {
/* loop over XYZW channels */
for (j = 0; j < 4; j++, index++) {
- reg = brw_vec1_grf(c->reg_index + index / 8, index % 8);
+ reg = brw_vec1_grf(reg_index + index / 8, index % 8);
/* Save pointer to parameter/constant value.
* Constants will be copied in prepare_constant_buffer()
*/
@@ -226,7 +360,7 @@ static void prealloc_reg(struct brw_wm_compile *c)
}
/* number of constant regs used (each reg is float[8]) */
c->nr_creg = 2 * ((4 * nr_params + 15) / 16);
- c->reg_index += c->nr_creg;
+ reg_index += c->nr_creg;
}
}
@@ -242,23 +376,31 @@ static void prealloc_reg(struct brw_wm_compile *c)
fp_input = -1;
if (fp_input >= 0 && inputs & (1 << fp_input)) {
- urb_read_length = c->reg_index;
- reg = brw_vec8_grf(c->reg_index, 0);
+ urb_read_length = reg_index;
+ reg = brw_vec8_grf(reg_index, 0);
for (j = 0; j < 4; j++)
set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg);
}
if (c->key.vp_outputs_written & (1 << i)) {
- c->reg_index += 2;
+ reg_index += 2;
}
}
c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
c->prog_data.urb_read_length = urb_read_length;
c->prog_data.curb_read_length = c->nr_creg;
- c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
- c->reg_index++;
- c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
- c->reg_index += 2;
+ c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
+ reg_index++;
+ c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
+ reg_index += 2;
+
+ /* mark GRF regs [0..reg_index-1] as in-use */
+ for (i = 0; i < reg_index; i++)
+ prealloc_grf(c, i);
+
+ /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */
+ prealloc_grf(c, 126);
+ prealloc_grf(c, 127);
/* An instruction may reference up to three constants.
* They'll be found in these registers.
@@ -267,12 +409,12 @@ static void prealloc_reg(struct brw_wm_compile *c)
if (c->fp->use_const_buffer) {
for (i = 0; i < 3; i++) {
c->current_const[i].index = -1;
- c->current_const[i].reg = alloc_tmp(c);
+ c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0);
}
}
#if 0
printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer);
- printf("AFTER PRE_ALLOC, reg_index = %d\n", c->reg_index);
+ printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index);
#endif
}
@@ -376,6 +518,14 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c,
const GLuint nr = 1;
const GLuint component = GET_SWZ(src->Swizzle, channel);
+ /* Extended swizzle terms */
+ if (component == SWIZZLE_ZERO) {
+ return brw_imm_f(0.0F);
+ }
+ else if (component == SWIZZLE_ONE) {
+ return brw_imm_f(1.0F);
+ }
+
if (c->fp->use_const_buffer &&
(src->File == PROGRAM_STATE_VAR ||
src->File == PROGRAM_CONSTANT ||
@@ -673,27 +823,26 @@ static void emit_fb_write(struct brw_wm_compile *c,
}
if (c->key.dest_depth_reg) {
- GLuint comp = c->key.dest_depth_reg / 2;
- GLuint off = c->key.dest_depth_reg % 2;
+ const GLuint comp = c->key.dest_depth_reg / 2;
+ const GLuint off = c->key.dest_depth_reg % 2;
- assert(comp == 1);
- assert(off == 0);
-#if 0
- /* XXX do we need this code? comp always 1, off always 0, it seems */
if (off != 0) {
+ /* XXX this code needs review/testing */
+ struct brw_reg arg1_0 = get_src_reg(c, inst, 1, comp);
+ struct brw_reg arg1_1 = get_src_reg(c, inst, 1, comp+1);
+
brw_push_insn_state(p);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
+ brw_MOV(p, brw_message_reg(nr), offset(arg1_0, 1));
/* 2nd half? */
- brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
+ brw_MOV(p, brw_message_reg(nr+1), arg1_1);
brw_pop_insn_state(p);
}
else
-#endif
{
- struct brw_reg src = get_src_reg(c, inst, 1, 1);
- brw_MOV(p, brw_message_reg(nr), src);
+ struct brw_reg src = get_src_reg(c, inst, 1, 1);
+ brw_MOV(p, brw_message_reg(nr), src);
}
nr += 2;
}
@@ -2476,6 +2625,7 @@ static void emit_txb(struct brw_wm_compile *c,
struct brw_reg dst[4], src[4], payload_reg;
GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
GLuint i;
+ GLuint msg_type;
payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
@@ -2504,6 +2654,14 @@ static void emit_txb(struct brw_wm_compile *c,
}
brw_MOV(p, brw_message_reg(5), src[3]); /* bias */
brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */
+
+ if (BRW_IS_IGDNG(p->brw)) {
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG;
+ } else {
+ /* Does it work well on SIMD8? */
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
+ }
+
brw_SAMPLE(p,
retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */
1, /* msg_reg_nr */
@@ -2511,10 +2669,12 @@ static void emit_txb(struct brw_wm_compile *c,
SURF_INDEX_TEXTURE(unit),
unit, /* sampler */
inst->DstReg.WriteMask, /* writemask */
- BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, /* msg_type */
+ msg_type, /* msg_type */
4, /* response_length */
4, /* msg_length */
- 0); /* eot */
+ 0, /* eot */
+ 1,
+ BRW_SAMPLER_SIMD_MODE_SIMD8);
}
@@ -2528,6 +2688,7 @@ static void emit_tex(struct brw_wm_compile *c,
GLuint i, nr;
GLuint emit;
GLboolean shadow = (c->key.shadowtex_mask & (1<<unit)) ? 1 : 0;
+ GLuint msg_type;
payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
@@ -2568,6 +2729,16 @@ static void emit_tex(struct brw_wm_compile *c,
brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */
}
+ if (BRW_IS_IGDNG(p->brw)) {
+ if (shadow)
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG;
+ } else {
+ /* Does it work for shadow on SIMD8 ? */
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
+ }
+
brw_SAMPLE(p,
retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */
1, /* msg_reg_nr */
@@ -2575,10 +2746,12 @@ static void emit_tex(struct brw_wm_compile *c,
SURF_INDEX_TEXTURE(unit),
unit, /* sampler */
inst->DstReg.WriteMask, /* writemask */
- BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, /* msg_type */
+ msg_type, /* msg_type */
4, /* response_length */
shadow ? 6 : 4, /* msg_length */
- 0); /* eot */
+ 0, /* eot */
+ 1,
+ BRW_SAMPLER_SIMD_MODE_SIMD8);
if (shadow)
brw_MOV(p, dst[3], brw_imm_f(1.0));
@@ -2595,15 +2768,15 @@ static void post_wm_emit( struct brw_wm_compile *c )
static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
{
-#define MAX_IFSN 32
+#define MAX_IF_DEPTH 32
#define MAX_LOOP_DEPTH 32
- struct brw_instruction *if_inst[MAX_IFSN], *loop_inst[MAX_LOOP_DEPTH];
- struct brw_instruction *inst0, *inst1;
- int i, if_insn = 0, loop_insn = 0;
+ struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH];
+ GLuint i, if_depth = 0, loop_depth = 0;
struct brw_compile *p = &c->func;
struct brw_indirect stack_index = brw_indirect(0, 0);
- c->reg_index = 0;
+ c->out_of_regs = GL_FALSE;
+
prealloc_reg(c);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
@@ -2611,6 +2784,8 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
for (i = 0; i < c->nr_fp_insns; i++) {
const struct prog_instruction *inst = &c->prog_instructions[i];
+ c->cur_inst = i;
+
#if 0
_mesa_printf("Inst %d: ", i);
_mesa_print_instruction(inst);
@@ -2770,15 +2945,15 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
emit_kil(c);
break;
case OPCODE_IF:
- assert(if_insn < MAX_IFSN);
- if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8);
+ assert(if_depth < MAX_IF_DEPTH);
+ if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8);
break;
case OPCODE_ELSE:
- if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]);
+ if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
break;
case OPCODE_ENDIF:
- assert(if_insn > 0);
- brw_ENDIF(p, if_inst[--if_insn]);
+ assert(if_depth > 0);
+ brw_ENDIF(p, if_inst[--if_depth]);
break;
case OPCODE_BGNSUB:
brw_save_label(p, inst->Comment, p->nr_insn);
@@ -2812,7 +2987,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
break;
case OPCODE_BGNLOOP:
/* XXX may need to invalidate the current_constant regs */
- loop_inst[loop_insn++] = brw_DO(p, BRW_EXECUTE_8);
+ loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
break;
case OPCODE_BRK:
brw_BREAK(p);
@@ -2823,36 +2998,40 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
case OPCODE_ENDLOOP:
- loop_insn--;
- inst0 = inst1 = brw_WHILE(p, loop_inst[loop_insn]);
- /* patch all the BREAK instructions from
- last BEGINLOOP */
- while (inst0 > loop_inst[loop_insn]) {
- inst0--;
- if (inst0->header.opcode == BRW_OPCODE_BREAK) {
- inst0->bits3.if_else.jump_count = inst1 - inst0 + 1;
+ {
+ struct brw_instruction *inst0, *inst1;
+ GLuint br = 1;
+
+ if (BRW_IS_IGDNG(brw))
+ br = 2;
+
+ loop_depth--;
+ inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
+ /* patch all the BREAK/CONT instructions from last BEGINLOOP */
+ while (inst0 > loop_inst[loop_depth]) {
+ inst0--;
+ if (inst0->header.opcode == BRW_OPCODE_BREAK) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
inst0->bits3.if_else.pop_count = 0;
- } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
- inst0->bits3.if_else.jump_count = inst1 - inst0;
+ }
+ else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
inst0->bits3.if_else.pop_count = 0;
- }
- }
- break;
+ }
+ }
+ }
+ break;
default:
_mesa_printf("unsupported IR in fragment shader %d\n",
inst->Opcode);
}
+
if (inst->CondUpdate)
brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
else
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
post_wm_emit(c);
-
- if (c->reg_index >= BRW_WM_MAX_GRF) {
- _mesa_problem(NULL, "Ran out of registers in brw_wm_emit_glsl()");
- /* XXX we need to do some proper error recovery here */
- }
}
@@ -2876,6 +3055,6 @@ void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
brw_wm_print_program(c, "brw_wm_glsl_emit done");
}
- c->prog_data.total_grf = c->reg_index;
+ c->prog_data.total_grf = num_grf_used(c);
c->prog_data.total_scratch = 0;
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_iz.c b/src/mesa/drivers/dri/i965/brw_wm_iz.c
index bd60ac9b31..8fd067abe7 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_iz.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_iz.c
@@ -116,6 +116,10 @@ const struct {
{ C, 0, 1, 1, 1 }
};
+/**
+ * \param line_aa AA_NEVER, AA_ALWAYS or AA_SOMETIMES
+ * \param lookup bitmask of IZ_* flags
+ */
void brw_wm_lookup_iz( GLuint line_aa,
GLuint lookup,
struct brw_wm_prog_key *key )
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c
index ab9aa2f10d..3436a24717 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass1.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c
@@ -159,6 +159,7 @@ void brw_wm_pass1( struct brw_wm_compile *c )
case OPCODE_FRC:
case OPCODE_MOV:
case OPCODE_SWZ:
+ case OPCODE_TRUNC:
read0 = writemask;
break;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index 67b41173fb..39f8c6d522 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -71,7 +71,9 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
key->max_threads = 1;
else {
/* WM maximum threads is number of EUs times number of threads per EU. */
- if (BRW_IS_G4X(brw))
+ if (BRW_IS_IGDNG(brw))
+ key->max_threads = 12 * 6;
+ else if (BRW_IS_G4X(brw))
key->max_threads = 10 * 5;
else
key->max_threads = 8 * 4;
@@ -141,7 +143,11 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */
wm.thread1.depth_coef_urb_read_offset = 1;
wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
- wm.thread1.binding_table_entry_count = key->nr_surfaces;
+
+ if (BRW_IS_IGDNG(brw))
+ wm.thread1.binding_table_entry_count = 0; /* hardware requirement */
+ else
+ wm.thread1.binding_table_entry_count = key->nr_surfaces;
if (key->total_scratch != 0) {
wm.thread2.scratch_space_base_pointer =
@@ -158,7 +164,11 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
- wm.wm4.sampler_count = (key->sampler_count + 1) / 4;
+ if (BRW_IS_IGDNG(brw))
+ wm.wm4.sampler_count = 0; /* hardware requirement */
+ else
+ wm.wm4.sampler_count = (key->sampler_count + 1) / 4;
+
if (brw->wm.sampler_bo != NULL) {
/* reloc */
wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset >> 5;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 805df8a4af..096f74394e 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -176,22 +176,6 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format,
}
}
-
-/**
- * Use same key for WM and VS surfaces.
- */
-struct brw_surface_key {
- GLenum target, depthmode;
- dri_bo *bo;
- GLint format, internal_format;
- GLint first_level, last_level;
- GLint width, height, depth;
- GLint pitch, cpp;
- uint32_t tiling;
- GLuint offset;
-};
-
-
static void
brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling)
{
@@ -268,7 +252,7 @@ brw_create_texture_surface( struct brw_context *brw,
surf.ss0.cube_neg_z = 1;
}
- bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
+ bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
key, sizeof(*key),
&key->bo, key->bo ? 1 : 0,
&surf, sizeof(surf),
@@ -321,10 +305,11 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
key.tiling = intelObj->mt->region->tiling;
dri_bo_unreference(brw->wm.surf_bo[surf]);
- brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
- &key, sizeof(key),
- &key.bo, key.bo ? 1 : 0,
- NULL);
+ brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
+ BRW_SS_SURFACE,
+ &key, sizeof(key),
+ &key.bo, key.bo ? 1 : 0,
+ NULL);
if (brw->wm.surf_bo[surf] == NULL) {
brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key);
}
@@ -336,7 +321,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
* Create the constant buffer surface. Vertex/fragment shader constants will be
* read from this buffer with Data Port Read instructions/messages.
*/
-static dri_bo *
+dri_bo *
brw_create_constant_surface( struct brw_context *brw,
struct brw_surface_key *key )
{
@@ -362,7 +347,7 @@ brw_create_constant_surface( struct brw_context *brw,
surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */
brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */
- bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
+ bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
key, sizeof(*key),
&key->bo, key->bo ? 1 : 0,
&surf, sizeof(surf),
@@ -380,39 +365,70 @@ brw_create_constant_surface( struct brw_context *brw,
return bo;
}
+/* Creates a new WM constant buffer reflecting the current fragment program's
+ * constants, if needed by the fragment program.
+ *
+ * Otherwise, constants go through the CURBEs using the brw_constant_buffer
+ * state atom.
+ */
+static drm_intel_bo *
+brw_wm_update_constant_buffer(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+ struct brw_fragment_program *fp =
+ (struct brw_fragment_program *) brw->fragment_program;
+ const struct gl_program_parameter_list *params = fp->program.Base.Parameters;
+ const int size = params->NumParameters * 4 * sizeof(GLfloat);
+ drm_intel_bo *const_buffer;
+
+ /* BRW_NEW_FRAGMENT_PROGRAM */
+ if (!fp->use_const_buffer)
+ return NULL;
+
+ const_buffer = drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer",
+ size, 64);
+
+ /* _NEW_PROGRAM_CONSTANTS */
+ dri_bo_subdata(const_buffer, 0, size, params->ParameterValues);
+
+ return const_buffer;
+}
/**
* Update the surface state for a WM constant buffer.
* The constant buffer will be (re)allocated here if needed.
*/
-static dri_bo *
+static void
brw_update_wm_constant_surface( GLcontext *ctx,
- GLuint surf,
- dri_bo *const_buffer,
- const struct gl_program_parameter_list *params)
+ GLuint surf)
{
struct brw_context *brw = brw_context(ctx);
struct brw_surface_key key;
- struct intel_context *intel = &brw->intel;
- const int size = params->NumParameters * 4 * sizeof(GLfloat);
+ struct brw_fragment_program *fp =
+ (struct brw_fragment_program *) brw->fragment_program;
+ const struct gl_program_parameter_list *params =
+ fp->program.Base.Parameters;
- /* free old const buffer if too small */
- if (const_buffer && const_buffer->size < size) {
- dri_bo_unreference(const_buffer);
- const_buffer = NULL;
- }
+ /* If we're in this state update atom, we need to update WM constants, so
+ * free the old buffer and create a new one for the new contents.
+ */
+ dri_bo_unreference(fp->const_buffer);
+ fp->const_buffer = brw_wm_update_constant_buffer(brw);
- /* alloc new buffer if needed */
- if (!const_buffer) {
- const_buffer =
- drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer", size, 64);
+ /* If there's no constant buffer, then no surface BO is needed to point at
+ * it.
+ */
+ if (fp->const_buffer == 0) {
+ drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
+ brw->wm.surf_bo[surf] = NULL;
+ return;
}
memset(&key, 0, sizeof(key));
key.format = MESA_FORMAT_RGBA_FLOAT32;
key.internal_format = GL_RGBA;
- key.bo = const_buffer;
+ key.bo = fp->const_buffer;
key.depthmode = GL_NONE;
key.pitch = params->NumParameters;
key.width = params->NumParameters;
@@ -427,77 +443,59 @@ brw_update_wm_constant_surface( GLcontext *ctx,
*/
dri_bo_unreference(brw->wm.surf_bo[surf]);
- brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
+ brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
+ BRW_SS_SURFACE,
&key, sizeof(key),
&key.bo, key.bo ? 1 : 0,
NULL);
if (brw->wm.surf_bo[surf] == NULL) {
brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key);
}
-
- return const_buffer;
+ brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
}
-
/**
- * Update the surface state for a VS constant buffer.
- * The constant buffer will be (re)allocated here if needed.
+ * Updates surface / buffer for fragment shader constant buffer, if
+ * one is required.
+ *
+ * This consumes the state updates for the constant buffer, and produces
+ * BRW_NEW_WM_SURFACES to get picked up by brw_prepare_wm_surfaces for
+ * inclusion in the binding table.
*/
-static dri_bo *
-brw_update_vs_constant_surface( GLcontext *ctx,
- GLuint surf,
- dri_bo *const_buffer,
- const struct gl_program_parameter_list *params)
+static void prepare_wm_constant_surface(struct brw_context *brw )
{
- struct brw_context *brw = brw_context(ctx);
- struct brw_surface_key key;
- struct intel_context *intel = &brw->intel;
- const int size = params->NumParameters * 4 * sizeof(GLfloat);
-
- assert(surf == 0);
-
- /* free old const buffer if too small */
- if (const_buffer && const_buffer->size < size) {
- dri_bo_unreference(const_buffer);
- const_buffer = NULL;
- }
-
- /* alloc new buffer if needed */
- if (!const_buffer) {
- const_buffer =
- drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", size, 64);
- }
-
- memset(&key, 0, sizeof(key));
-
- key.format = MESA_FORMAT_RGBA_FLOAT32;
- key.internal_format = GL_RGBA;
- key.bo = const_buffer;
- key.depthmode = GL_NONE;
- key.pitch = params->NumParameters;
- key.width = params->NumParameters;
- key.height = 1;
- key.depth = 1;
- key.cpp = 16;
+ GLcontext *ctx = &brw->intel.ctx;
+ struct brw_fragment_program *fp =
+ (struct brw_fragment_program *) brw->fragment_program;
+ GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER;
- /*
- printf("%s:\n", __FUNCTION__);
- printf(" width %d height %d depth %d cpp %d pitch %d\n",
- key.width, key.height, key.depth, key.cpp, key.pitch);
- */
+ drm_intel_bo_unreference(fp->const_buffer);
+ fp->const_buffer = brw_wm_update_constant_buffer(brw);
- dri_bo_unreference(brw->vs.surf_bo[surf]);
- brw->vs.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
- &key, sizeof(key),
- &key.bo, key.bo ? 1 : 0,
- NULL);
- if (brw->vs.surf_bo[surf] == NULL) {
- brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key);
+ /* If there's no constant buffer, then no surface BO is needed to point at
+ * it.
+ */
+ if (fp->const_buffer == 0) {
+ if (brw->wm.surf_bo[surf] != NULL) {
+ drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
+ brw->wm.surf_bo[surf] = NULL;
+ brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
+ }
+ return;
}
- return const_buffer;
+ brw_update_wm_constant_surface(ctx, surf);
}
+const struct brw_tracked_state brw_wm_constant_surface = {
+ .dirty = {
+ .mesa = (_NEW_PROGRAM_CONSTANTS),
+ .brw = (BRW_NEW_FRAGMENT_PROGRAM),
+ .cache = 0
+ },
+ .prepare = prepare_wm_constant_surface,
+};
+
/**
* Sets up a surface state structure to point at the given region.
@@ -507,7 +505,7 @@ brw_update_vs_constant_surface( GLcontext *ctx,
static void
brw_update_renderbuffer_surface(struct brw_context *brw,
struct gl_renderbuffer *rb,
- unsigned int unit, GLboolean cached)
+ unsigned int unit)
{
GLcontext *ctx = &brw->intel.ctx;
dri_bo *region_bo = NULL;
@@ -567,12 +565,11 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
ctx->Color.BlendEnabled);
dri_bo_unreference(brw->wm.surf_bo[unit]);
- brw->wm.surf_bo[unit] = NULL;
- if (cached)
- brw->wm.surf_bo[unit] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
- &key, sizeof(key),
- &region_bo, 1,
- NULL);
+ brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache,
+ BRW_SS_SURFACE,
+ &key, sizeof(key),
+ &region_bo, 1,
+ NULL);
if (brw->wm.surf_bo[unit] == NULL) {
struct brw_surface_state surf;
@@ -581,7 +578,27 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
surf.ss0.surface_format = key.surface_format;
surf.ss0.surface_type = key.surface_type;
- surf.ss1.base_addr = key.draw_offset;
+ if (key.tiling == I915_TILING_NONE) {
+ surf.ss1.base_addr = key.draw_offset;
+ } else {
+ uint32_t tile_offset = key.draw_offset % 4096;
+
+ surf.ss1.base_addr = key.draw_offset - tile_offset;
+
+ assert(BRW_IS_G4X(brw) || tile_offset == 0);
+ if (BRW_IS_G4X(brw)) {
+ if (key.tiling == I915_TILING_X) {
+ /* Note that the low bits of these fields are missing, so
+ * there's the possibility of getting in trouble.
+ */
+ surf.ss5.x_offset = (tile_offset % 512) / key.cpp / 4;
+ surf.ss5.y_offset = tile_offset / 512 / 2;
+ } else {
+ surf.ss5.x_offset = (tile_offset % 128) / key.cpp / 4;
+ surf.ss5.y_offset = tile_offset / 128 / 2;
+ }
+ }
+ }
if (region_bo != NULL)
surf.ss1.base_addr += region_bo->offset; /* reloc */
@@ -598,7 +615,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
surf.ss0.writedisable_alpha = !key.color_mask[3];
/* Key size will never match key size for textures, so we're safe. */
- brw->wm.surf_bo[unit] = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
+ brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache,
+ BRW_SS_SURFACE,
&key, sizeof(key),
&region_bo, 1,
&surf, sizeof(surf),
@@ -611,7 +629,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit],
offsetof(struct brw_surface_state, ss1),
region_bo,
- key.draw_offset,
+ surf.ss1.base_addr,
I915_GEM_DOMAIN_RENDER,
I915_GEM_DOMAIN_RENDER);
}
@@ -630,7 +648,7 @@ brw_wm_get_binding_table(struct brw_context *brw)
assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF);
- bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND,
+ bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
NULL, 0,
brw->wm.surf_bo, brw->wm.nr_surfaces,
NULL);
@@ -646,7 +664,7 @@ brw_wm_get_binding_table(struct brw_context *brw)
else
data[i] = 0;
- bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND,
+ bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
NULL, 0,
brw->wm.surf_bo, brw->wm.nr_surfaces,
data, data_size,
@@ -682,27 +700,17 @@ static void prepare_wm_surfaces(struct brw_context *brw )
for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
brw_update_renderbuffer_surface(brw,
ctx->DrawBuffer->_ColorDrawBuffers[i],
- i,
- GL_FALSE);
+ i);
}
} else {
- brw_update_renderbuffer_surface(brw, NULL, 0, GL_TRUE);
+ brw_update_renderbuffer_surface(brw, NULL, 0);
}
old_nr_surfaces = brw->wm.nr_surfaces;
brw->wm.nr_surfaces = MAX_DRAW_BUFFERS;
- /* Update surface / buffer for fragment shader constant buffer */
- {
- const GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER;
- struct brw_fragment_program *fp =
- (struct brw_fragment_program *) brw->fragment_program;
- fp->const_buffer =
- brw_update_wm_constant_surface(ctx, surf, fp->const_buffer,
- fp->program.Base.Parameters);
-
- brw->wm.nr_surfaces = surf + 1;
- }
+ if (brw->wm.surf_bo[SURF_INDEX_FRAG_CONST_BUFFER] != NULL)
+ brw->wm.nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1;
/* Update surfaces for textures */
for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
@@ -735,100 +743,16 @@ static void prepare_wm_surfaces(struct brw_context *brw )
brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES;
}
-
-/**
- * Constructs the binding table for the VS surface state.
- */
-static dri_bo *
-brw_vs_get_binding_table(struct brw_context *brw)
-{
- dri_bo *bind_bo;
-
- assert(brw->vs.nr_surfaces <= BRW_VS_MAX_SURF);
-
- bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND,
- NULL, 0,
- brw->vs.surf_bo, brw->vs.nr_surfaces,
- NULL);
-
- if (bind_bo == NULL) {
- GLuint data_size = brw->vs.nr_surfaces * sizeof(GLuint);
- uint32_t *data = malloc(data_size);
- int i;
-
- for (i = 0; i < brw->vs.nr_surfaces; i++)
- if (brw->vs.surf_bo[i])
- data[i] = brw->vs.surf_bo[i]->offset;
- else
- data[i] = 0;
-
- bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND,
- NULL, 0,
- brw->vs.surf_bo, brw->vs.nr_surfaces,
- data, data_size,
- NULL, NULL);
-
- /* Emit binding table relocations to surface state */
- for (i = 0; i < BRW_VS_MAX_SURF; i++) {
- if (brw->vs.surf_bo[i] != NULL) {
- dri_bo_emit_reloc(bind_bo,
- I915_GEM_DOMAIN_INSTRUCTION, 0,
- 0,
- i * sizeof(GLuint),
- brw->vs.surf_bo[i]);
- }
- }
-
- free(data);
- }
-
- return bind_bo;
-}
-
-
-/**
- * Vertex shader surfaces. Just constant buffer for now. Could add vertex
- * shader textures in the future.
- */
-static void prepare_vs_surfaces(struct brw_context *brw )
-{
- GLcontext *ctx = &brw->intel.ctx;
-
- /* Update surface / buffer for vertex shader constant buffer */
- {
- const GLuint surf = SURF_INDEX_VERT_CONST_BUFFER;
- struct brw_vertex_program *vp =
- (struct brw_vertex_program *) brw->vertex_program;
- vp->const_buffer =
- brw_update_vs_constant_surface(ctx, surf, vp->const_buffer,
- vp->program.Base.Parameters);
-
- brw->vs.nr_surfaces = 1;
- }
-
- dri_bo_unreference(brw->vs.bind_bo);
- brw->vs.bind_bo = brw_vs_get_binding_table(brw);
-
- if (1)
- brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES;
-}
-
-
-static void
-prepare_surfaces(struct brw_context *brw)
-{
- prepare_wm_surfaces(brw);
- prepare_vs_surfaces(brw);
-}
-
-
const struct brw_tracked_state brw_wm_surfaces = {
.dirty = {
- .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS | _NEW_PROGRAM,
- .brw = BRW_NEW_CONTEXT,
+ .mesa = (_NEW_COLOR |
+ _NEW_TEXTURE |
+ _NEW_BUFFERS),
+ .brw = (BRW_NEW_CONTEXT |
+ BRW_NEW_WM_SURFACES),
.cache = 0
},
- .prepare = prepare_surfaces,
+ .prepare = prepare_wm_surfaces,
};
diff --git a/src/mesa/drivers/dri/i965/intel_generatemipmap.c b/src/mesa/drivers/dri/i965/intel_generatemipmap.c
new file mode 120000
index 0000000000..4c6b37ada0
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_generatemipmap.c
@@ -0,0 +1 @@
+../intel/intel_generatemipmap.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c
new file mode 120000
index 0000000000..cc4589f4d4
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -0,0 +1 @@
+../intel/intel_pixel_read.c \ No newline at end of file