From cab307ce6b32d2ffdb0eb3bb5bae93c6fb9305fb Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 15 May 2009 09:14:24 -0600 Subject: i965: rename var: s/tmp/vs_inputs/ (cherry picked from commit 840c09fc71542fdfc71edd2a2802925d467567bb) --- src/mesa/drivers/dri/i965/brw_draw_upload.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index b91b20bec6..1b8bcc14ec 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -343,7 +343,7 @@ static void brw_prepare_vertices(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); - GLuint tmp = brw->vs.prog_data->inputs_read; + GLbitfield vs_inputs = brw->vs.prog_data->inputs_read; GLuint i; const unsigned char *ptr = NULL; GLuint interleave = 0; @@ -362,11 +362,11 @@ static void brw_prepare_vertices(struct brw_context *brw) _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); /* Accumulate the list of enabled arrays. */ - while (tmp) { - GLuint i = _mesa_ffsll(tmp)-1; + while (vs_inputs) { + GLuint i = _mesa_ffsll(vs_inputs) - 1; struct brw_vertex_element *input = &brw->vb.inputs[i]; - tmp &= ~(1<intel.ctx; struct intel_context *intel = intel_context(ctx); - GLuint tmp = brw->vs.prog_data->inputs_read; + GLbitfield vs_inputs = brw->vs.prog_data->inputs_read; struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; GLuint i; GLuint nr_enabled = 0; /* Accumulate the list of enabled arrays. */ - while (tmp) { - i = _mesa_ffsll(tmp)-1; + while (vs_inputs) { + i = _mesa_ffsll(vs_inputs) - 1; struct brw_vertex_element *input = &brw->vb.inputs[i]; - tmp &= ~(1< Date: Tue, 23 Jun 2009 19:30:25 -0700 Subject: i965: Set the max index buffer address correctly according to the docs. It's the last addressable byte, not the byte after the end of the buffer. (cherry picked from commit b72dea5441e8e9226dabf1826fa3bc129c7bc281) --- src/mesa/drivers/dri/i965/brw_draw_upload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 1b8bcc14ec..3ef56a0068 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -635,7 +635,7 @@ static void brw_emit_indices(struct brw_context *brw) if (index_buffer == NULL) return; - ib_size = get_size(index_buffer->type) * index_buffer->count; + ib_size = get_size(index_buffer->type) * index_buffer->count - 1; /* Emit the indexbuffer packet: */ -- cgit v1.2.3 From 7e26bdb849b75f4aeb69cf8b1fdffbc461265490 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 3 Aug 2009 14:46:18 -0700 Subject: i965: Calculate enabled[] and nr_enabled once and re-use the values. The code duplication bothered me. (cherry picked from commit 9b9cb30d128fc5f1ba77287696ecd508e640efde) --- src/mesa/drivers/dri/i965/brw_context.h | 3 ++ src/mesa/drivers/dri/i965/brw_draw_upload.c | 44 ++++++++++------------------- 2 files changed, 18 insertions(+), 29 deletions(-) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 577497bf6b..e52fc3f374 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -477,6 +477,9 @@ struct brw_context struct { struct brw_vertex_element inputs[VERT_ATTRIB_MAX]; + struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; + GLuint nr_enabled; + #define BRW_NR_UPLOAD_BUFS 17 #define BRW_UPLOAD_INIT_SIZE (128*1024) diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 3ef56a0068..c1fe85908b 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -350,9 +350,6 @@ static void brw_prepare_vertices(struct brw_context *brw) unsigned int min_index = brw->vb.min_index; unsigned int max_index = brw->vb.max_index; - struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; - GLuint nr_enabled = 0; - struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; GLuint nr_uploads = 0; @@ -362,12 +359,13 @@ static void brw_prepare_vertices(struct brw_context *brw) _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); /* Accumulate the list of enabled arrays. */ + brw->vb.nr_enabled = 0; while (vs_inputs) { GLuint i = _mesa_ffsll(vs_inputs) - 1; struct brw_vertex_element *input = &brw->vb.inputs[i]; vs_inputs &= ~(1 << i); - enabled[nr_enabled++] = input; + brw->vb.enabled[brw->vb.nr_enabled++] = input; } /* XXX: In the rare cases where this happens we fallback all @@ -376,13 +374,13 @@ static void brw_prepare_vertices(struct brw_context *brw) * cases with > 17 vertex attributes enabled, so it probably * isn't an issue at this point. */ - if (nr_enabled >= BRW_VEP_MAX) { + if (brw->vb.nr_enabled >= BRW_VEP_MAX) { intel->Fallback = 1; return; } - for (i = 0; i < nr_enabled; i++) { - struct brw_vertex_element *input = enabled[i]; + for (i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; input->element_size = get_size(input->glarray->Type) * input->glarray->Size; input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1; @@ -466,8 +464,8 @@ static void brw_prepare_vertices(struct brw_context *brw) brw_prepare_query_begin(brw); - for (i = 0; i < nr_enabled; i++) { - struct brw_vertex_element *input = enabled[i]; + for (i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; brw_add_validated_bo(brw, input->bo); } @@ -477,19 +475,7 @@ static void brw_emit_vertices(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); - GLbitfield vs_inputs = brw->vs.prog_data->inputs_read; - struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; GLuint i; - GLuint nr_enabled = 0; - - /* Accumulate the list of enabled arrays. */ - while (vs_inputs) { - i = _mesa_ffsll(vs_inputs) - 1; - struct brw_vertex_element *input = &brw->vb.inputs[i]; - - vs_inputs &= ~(1 << i); - enabled[nr_enabled++] = input; - } brw_emit_query_begin(brw); @@ -499,12 +485,12 @@ static void brw_emit_vertices(struct brw_context *brw) * are interleaved or from the same VBO. TBD if this makes a * performance difference. */ - BEGIN_BATCH(1 + nr_enabled * 4, IGNORE_CLIPRECTS); + BEGIN_BATCH(1 + brw->vb.nr_enabled * 4, IGNORE_CLIPRECTS); OUT_BATCH((CMD_VERTEX_BUFFER << 16) | - ((1 + nr_enabled * 4) - 2)); + ((1 + brw->vb.nr_enabled * 4) - 2)); - for (i = 0; i < nr_enabled; i++) { - struct brw_vertex_element *input = enabled[i]; + for (i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) | BRW_VB0_ACCESS_VERTEXDATA | @@ -517,10 +503,10 @@ static void brw_emit_vertices(struct brw_context *brw) } ADVANCE_BATCH(); - BEGIN_BATCH(1 + nr_enabled * 2, IGNORE_CLIPRECTS); - OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + nr_enabled * 2) - 2)); - for (i = 0; i < nr_enabled; i++) { - struct brw_vertex_element *input = enabled[i]; + BEGIN_BATCH(1 + brw->vb.nr_enabled * 2, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_enabled * 2) - 2)); + for (i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; uint32_t format = get_surface_type(input->glarray->Type, input->glarray->Size, input->glarray->Format, -- cgit v1.2.3 From 9eca0e5350377148976e0d1200f98bd20ac28197 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 30 Jul 2009 13:40:29 -0700 Subject: i965: Don't emit bad packets when no VBs are referenced. It appears that sometimes Mesa (and I suppose a VS could as well) emits a program which references no vertex data, and thus we end up with nr_enabled == 0 even though some VBs are enabled. We'd end up emitting VB/VE packet headers of 0xffffffff in that case, leading to GPU hangs. Bug #22945 (wine with an uncompiled VS) (cherry picked from commit d1fbfd0f962347e4153db3852292d44de5aea863) --- src/mesa/drivers/dri/i965/brw_draw_upload.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index c1fe85908b..e7a87b6e09 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -479,6 +479,28 @@ static void brw_emit_vertices(struct brw_context *brw) brw_emit_query_begin(brw); + /* If the VS doesn't read any inputs (calculating vertex position from + * a state variable for some reason, for example), emit a single pad + * VERTEX_ELEMENT struct and bail. + * + * The stale VB state stays in place, but they don't do anything unless + * a VE loads from them. + */ + if (brw->vb.nr_enabled == 0) { + BEGIN_BATCH(3, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1); + OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | + BRW_VE0_VALID | + (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | + (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | + (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT)); + ADVANCE_BATCH(); + return; + } + /* Now emit VB and VEP state packets. * * This still defines a hardware VB for each input, even if they -- cgit v1.2.3 From 456a16491bc757af6ba9a98e5706a78f748e9a79 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 3 Aug 2009 15:24:02 -0700 Subject: i965: Make sure the VS URB size is big enough to fit a VF VUE. This fix is just from code and docs inspection, but it may fix hangs on some applications. (cherry picked from commit e93848e595176ae0bad3bfe64e0ca63fd089bb72) --- src/mesa/drivers/dri/i965/brw_vs_emit.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index b69616d6e5..6792c3a34e 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -68,6 +68,7 @@ static void release_tmps( struct brw_vs_compile *c ) static void brw_vs_alloc_regs( struct brw_vs_compile *c ) { GLuint i, reg = 0, mrf; + int attributes_in_vue; #if 0 if (c->vp->program.Base.Parameters->NumParameters >= 6) @@ -201,7 +202,13 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) */ c->prog_data.urb_read_length = (c->nr_inputs + 1) / 2; - c->prog_data.urb_entry_size = (c->nr_outputs + 2 + 3) / 4; + /* The VS VUEs are shared by VF (outputting our inputs) and VS, so size + * them to fit the biggest thing they need to. + */ + attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs); + + c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4; + c->prog_data.total_grf = reg; if (INTEL_DEBUG & DEBUG_VS) { -- cgit v1.2.3 From 217af32c2d6afab5e1907cc16fb4b6feb982abe7 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 3 Aug 2009 17:12:43 -0700 Subject: i965: Even if no VS inputs are set, still load some amount of URB as required. See comment on Vertex URB Entry Read Length for VS_STATE. This, combined with the previous three commits, fixes #22945. (cherry picked from commit e340d4f9866db4bae391288e83a630a310b0dd2b) --- src/mesa/drivers/dri/i965/brw_vs_emit.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 6792c3a34e..b95079a5d1 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -124,6 +124,11 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) reg++; } } + /* If there are no inputs, we'll still be reading one attribute's worth + * because it's required -- see urb_read_length setting. + */ + if (c->nr_inputs == 0) + reg++; /* Allocate outputs: TODO: could organize the non-position outputs * to go straight into message regs. @@ -201,6 +206,12 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) * vertex urb, so is half the amount: */ c->prog_data.urb_read_length = (c->nr_inputs + 1) / 2; + /* Setting this field to 0 leads to undefined behavior according to the + * the VS_STATE docs. Our VUEs will always have at least one attribute + * sitting in them, even if it's padding. + */ + if (c->prog_data.urb_read_length == 0) + c->prog_data.urb_read_length = 1; /* The VS VUEs are shared by VF (outputting our inputs) and VS, so size * them to fit the biggest thing they need to. -- cgit v1.2.3 From 63b3fa2bcecc75a116ce651da435d205ccd43584 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 3 Aug 2009 17:55:14 -0700 Subject: i965: Assert that the offset in the VBO is below the VBO size. This avoids sending a bad buffer address to the GPU due to programmer error, and is permitted by the ARB_vbo spec. Note that we still have the opportunity to dereference past the end of the GPU, because we aren't clipping to a correct _MaxElement, but that appears to be harder than it should be. This gets us the 90% solution. Bug #19911. (cherry picked from commit d7430d942f6c7950a92367aeb13b80cf76ccad78) --- src/mesa/drivers/dri/i965/brw_draw_upload.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index e7a87b6e09..05079c043a 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -396,6 +396,20 @@ static void brw_prepare_vertices(struct brw_context *brw) dri_bo_reference(input->bo); input->offset = (unsigned long)input->glarray->Ptr; input->stride = input->glarray->StrideB; + + /* This is a common place to reach if the user mistakenly supplies + * a pointer in place of a VBO offset. If we just let it go through, + * we may end up dereferencing a pointer beyond the bounds of the + * GTT. We would hope that the VBO's max_index would save us, but + * Mesa appears to hand us min/max values not clipped to the + * array object's _MaxElement, and _MaxElement frequently appears + * to be wrong anyway. + * + * The VBO spec allows application termination in this case, and it's + * probably a service to the poor programmer to do so rather than + * trying to just not render. + */ + assert(input->offset < input->bo->size); } else { if (input->bo != NULL) { /* Already-uploaded vertex data is present from a previous -- cgit v1.2.3 From f396263651b867be39800bfd274b1be3d78ef60f Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 4 Aug 2009 12:39:22 -0700 Subject: i965: Fix RECT shadow sampling by not losing the other texcoords. Bug #20821 (cherry picked from commit 191e028de20b2f954621b652aa77b06d0e93652a) --- src/mesa/drivers/dri/i965/brw_wm_fp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 49aad281d7..8cf3a1fd13 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -705,7 +705,11 @@ static void precalc_tex( struct brw_wm_compile *c, tmpcoord, 0, inst->SrcReg[0], - scale, + src_swizzle(scale, + SWIZZLE_X, + SWIZZLE_Y, + SWIZZLE_ONE, + SWIZZLE_ONE), src_undef()); coord = src_reg_from_dst(tmpcoord); -- cgit v1.2.3 From 83e6c67363d77d89ab4b6e2e202c8634d582c907 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 4 Aug 2009 13:42:30 -0700 Subject: i965: Spell "conditional" correctly. --- src/mesa/drivers/dri/i965/brw_eu.c | 2 +- src/mesa/drivers/dri/i965/brw_eu_emit.c | 26 +++++++++++++------------- src/mesa/drivers/dri/i965/brw_structs.h | 2 +- 3 files changed, 15 insertions(+), 15 deletions(-) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index c53efba599..1df561386e 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -62,7 +62,7 @@ void brw_set_predicate_control( struct brw_compile *p, GLuint pc ) void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional ) { - p->current->header.destreg__conditonalmod = conditional; + p->current->header.destreg__conditionalmod = conditional; } void brw_set_access_mode( struct brw_compile *p, GLuint access_mode ) diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 2a147fb8c3..48243e1574 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -377,8 +377,8 @@ static struct brw_instruction *next_insn( struct brw_compile *p, /* Reset this one-shot flag: */ - if (p->current->header.destreg__conditonalmod) { - p->current->header.destreg__conditonalmod = 0; + if (p->current->header.destreg__conditionalmod) { + p->current->header.destreg__conditionalmod = 0; p->current->header.predicate_control = BRW_PREDICATE_NORMAL; } @@ -746,7 +746,7 @@ void brw_CMP(struct brw_compile *p, { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); - insn->header.destreg__conditonalmod = conditional; + insn->header.destreg__conditionalmod = conditional; brw_set_dest(insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, src1); @@ -790,7 +790,7 @@ void brw_math( struct brw_compile *p, * instructions. */ insn->header.predicate_control = 0; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src); @@ -826,7 +826,7 @@ void brw_math_16( struct brw_compile *p, brw_set_compression_control(p, BRW_COMPRESSION_NONE); insn = next_insn(p, BRW_OPCODE_SEND); - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src); @@ -842,7 +842,7 @@ void brw_math_16( struct brw_compile *p, */ insn = next_insn(p, BRW_OPCODE_SEND); insn->header.compression_control = BRW_COMPRESSION_2NDHALF; - insn->header.destreg__conditonalmod = msg_reg_nr+1; + insn->header.destreg__conditionalmod = msg_reg_nr+1; brw_set_dest(insn, offset(dest,1)); brw_set_src0(insn, src); @@ -888,7 +888,7 @@ void brw_dp_WRITE_16( struct brw_compile *p, insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src); @@ -933,7 +933,7 @@ void brw_dp_READ_16( struct brw_compile *p, insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); /* UW? */ brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); @@ -986,7 +986,7 @@ void brw_dp_READ_4( struct brw_compile *p, insn->header.predicate_control = BRW_PREDICATE_NONE; insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; insn->header.mask_control = BRW_MASK_DISABLE; /* cast dest to a uword[8] vector */ @@ -1059,7 +1059,7 @@ void brw_dp_READ_4_vs(struct brw_compile *p, insn->header.predicate_control = BRW_PREDICATE_NONE; insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; insn->header.mask_control = BRW_MASK_DISABLE; /*insn->header.access_mode = BRW_ALIGN_16;*/ @@ -1092,7 +1092,7 @@ void brw_fb_WRITE(struct brw_compile *p, insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src0); @@ -1187,7 +1187,7 @@ void brw_SAMPLE(struct brw_compile *p, insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src0); @@ -1238,7 +1238,7 @@ void brw_urb_WRITE(struct brw_compile *p, brw_set_src0(insn, src0); brw_set_src1(insn, brw_imm_d(0)); - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_urb_message(insn, allocate, diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 89e2981203..6011a4aa0a 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -1168,7 +1168,7 @@ struct brw_instruction GLuint predicate_control:4; GLuint predicate_inverse:1; GLuint execution_size:3; - GLuint destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */ + GLuint destreg__conditionalmod:4; /* destreg - send, conditionalmod - others */ GLuint pad0:2; GLuint debug_control:1; GLuint saturate:1; -- cgit v1.2.3 From 3d6c73513c5918a3f833ebf402803946b78828b8 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 4 Aug 2009 14:13:27 -0700 Subject: i965: Emit conditional code updates as required for GLSL VS if statements. Previously, we'd be branching based on whatever condition code happened to be laying around. (cherry picked from commit 7007f8b352763af89805f287153cb7972bff0523) --- src/mesa/drivers/dri/i965/brw_vs_emit.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index b95079a5d1..4887cdea5b 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -1448,6 +1448,19 @@ void brw_vs_emit(struct brw_vs_compile *c ) "unknown"); } + /* Set the predication update on the last instruction of the native + * instruction sequence. + * + * This would be problematic if it was set on a math instruction, + * but that shouldn't be the case with the current GLSL compiler. + */ + if (inst->CondUpdate) { + struct brw_instruction *hw_insn = &p->store[p->nr_insn - 1]; + + assert(hw_insn->header.destreg__conditionalmod == 0); + hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ; + } + if ((inst->DstReg.File == PROGRAM_OUTPUT) && (inst->DstReg.Index != VERT_RESULT_HPOS) && c->output_regs[inst->DstReg.Index].used_in_src) { -- cgit v1.2.3 From a0b7850f1da6e367ed3380fbf0a59fa5af992546 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 18 Jun 2009 09:23:58 -0600 Subject: i965: asst clean-ups, etc in brw_vs_emit() (cherry picked from commit fd7d764514c540987549c3ea88a2d669b0f0ea58) --- src/mesa/drivers/dri/i965/brw_vs_emit.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 4887cdea5b..7c09222281 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -1195,15 +1195,14 @@ post_vs_emit( struct brw_vs_compile *c, */ void brw_vs_emit(struct brw_vs_compile *c ) { -#define MAX_IFSN 32 +#define MAX_IF_DEPTH 32 struct brw_compile *p = &c->func; - GLuint nr_insns = c->vp->program.Base.NumInstructions; - GLuint insn, if_insn = 0; + const GLuint nr_insns = c->vp->program.Base.NumInstructions; + GLuint insn, if_depth = 0; GLuint end_offset = 0; struct brw_instruction *end_inst, *last_inst; - struct brw_instruction *if_inst[MAX_IFSN]; - struct brw_indirect stack_index = brw_indirect(0, 0); - + struct brw_instruction *if_inst[MAX_IF_DEPTH]; + const struct brw_indirect stack_index = brw_indirect(0, 0); GLuint index; GLuint file; @@ -1394,15 +1393,15 @@ void brw_vs_emit(struct brw_vs_compile *c ) emit_xpd(p, dst, args[0], args[1]); break; case OPCODE_IF: - assert(if_insn < MAX_IFSN); - if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8); + assert(if_depth < MAX_IF_DEPTH); + if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8); break; case OPCODE_ELSE: - if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]); + if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); break; case OPCODE_ENDIF: - assert(if_insn > 0); - brw_ENDIF(p, if_inst[--if_insn]); + assert(if_depth > 0); + brw_ENDIF(p, if_inst[--if_depth]); break; case OPCODE_BRA: brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); -- cgit v1.2.3 From 8c764d5c34548d56e912cdc5990adffb0e8fd9f7 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 4 Aug 2009 18:02:31 -0700 Subject: i965: Respect CondSwizzle in OPCODE_IF. Fixes piglit glsl-vs-if-bool and progs/glsl/twoside, and will likely be useful for the looping code. Bug #18992 (cherry picked from commit 78c022acd0b37bf8b32f04313d76255255e769c1) (cherry picked from commit 63d7a2f53fb38e170f4e55f2b599e918edf2c512) --- src/mesa/drivers/dri/i965/brw_vs_emit.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 7c09222281..72cd36a603 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -1190,6 +1190,23 @@ post_vs_emit( struct brw_vs_compile *c, brw_set_src1(end_inst, brw_imm_d(offset * 16)); } +static uint32_t +get_predicate(uint32_t swizzle) +{ + switch (swizzle) { + case SWIZZLE_XXXX: + return BRW_PREDICATE_ALIGN16_REPLICATE_X; + case SWIZZLE_YYYY: + return BRW_PREDICATE_ALIGN16_REPLICATE_Y; + case SWIZZLE_ZZZZ: + return BRW_PREDICATE_ALIGN16_REPLICATE_Z; + case SWIZZLE_WWWW: + return BRW_PREDICATE_ALIGN16_REPLICATE_W; + default: + _mesa_problem(NULL, "Unexpected predicate: 0x%08x\n", swizzle); + return BRW_PREDICATE_NORMAL; + } +} /* Emit the vertex program instructions here. */ @@ -1394,7 +1411,10 @@ void brw_vs_emit(struct brw_vs_compile *c ) break; case OPCODE_IF: assert(if_depth < MAX_IF_DEPTH); - if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8); + if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8); + if_inst[if_depth]->header.predicate_control = + get_predicate(inst->DstReg.CondSwizzle); + if_depth++; break; case OPCODE_ELSE: if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); -- cgit v1.2.3 From 94d3b832cc5c73544c478804531e16644483d8be Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 5 Aug 2009 20:12:15 -0700 Subject: i965: Fix source depth reg setting for FSes reading and writing to depth. For some IZ setups, we'd forget to account for the source depth register being present, so we'd both read the wrong reg, and write output depth to the wrong reg. Bug #22603. (cherry picked from commit f44916414ecd2b888c8a680d56b7467ccdff6886) --- src/mesa/drivers/dri/i965/brw_wm.c | 2 ++ src/mesa/drivers/dri/i965/brw_wm.h | 1 + src/mesa/drivers/dri/i965/brw_wm_iz.c | 3 ++- 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 8a3b7df9c7..d4e22d5939 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -202,6 +202,7 @@ static void brw_wm_populate_key( struct brw_context *brw, /* BRW_NEW_FRAGMENT_PROGRAM */ const struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; + GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0; GLuint lookup = 0; GLuint line_aa; GLuint i; @@ -263,6 +264,7 @@ static void brw_wm_populate_key( struct brw_context *brw, brw_wm_lookup_iz(line_aa, lookup, + uses_depth, key); diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 295fed851b..307b5e59dd 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -286,6 +286,7 @@ void brw_wm_print_program( struct brw_wm_compile *c, void brw_wm_lookup_iz( GLuint line_aa, GLuint lookup, + GLboolean ps_uses_depth, struct brw_wm_prog_key *key ); GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); diff --git a/src/mesa/drivers/dri/i965/brw_wm_iz.c b/src/mesa/drivers/dri/i965/brw_wm_iz.c index bd60ac9b31..7e2b1c79de 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_iz.c +++ b/src/mesa/drivers/dri/i965/brw_wm_iz.c @@ -118,6 +118,7 @@ const struct { void brw_wm_lookup_iz( GLuint line_aa, GLuint lookup, + GLboolean ps_uses_depth, struct brw_wm_prog_key *key ) { GLuint reg = 2; @@ -127,7 +128,7 @@ void brw_wm_lookup_iz( GLuint line_aa, if (lookup & IZ_PS_COMPUTES_DEPTH_BIT) key->computes_depth = 1; - if (wm_iz_table[lookup].sd_present) { + if (wm_iz_table[lookup].sd_present || ps_uses_depth) { key->source_depth_reg = reg; reg += 2; } -- cgit v1.2.3 From a9504edee344936ef496341174d4c2d76ee0c44d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 18 Aug 2009 18:48:54 -0700 Subject: intel: Align cubemap texture height to its padding requirements. (cherry picked from commit a70e1315846cd5e8d6f2b622821ff8262fe7179d) (cherry picked from commit 29e51c3872531366570d032147abad50f8a3c1af) --- src/mesa/drivers/dri/i965/brw_tex_layout.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 51a617fcb4..3ab27c26ef 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -119,6 +119,16 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t } } + /* The 965's sampler lays cachelines out according to how accesses + * in the texture surfaces run, so they may be "vertical" through + * memory. As a result, the docs say in Surface Padding Requirements: + * Sampling Engine Surfaces that two extra rows of padding are required. + * We don't know of similar requirements for pre-965, but given that + * those docs are silent on padding requirements in general, let's play + * it safe. + */ + if (mt->target == GL_TEXTURE_CUBE_MAP) + mt->total_height += 2; break; } -- cgit v1.2.3 From 1d1dfc96c48681c455e1174a60bd325b33a2784f Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 16 Jul 2009 15:57:22 -0700 Subject: i965: Add missing state dependency of sf_unit on _NEW_BUFFERS. (cherry picked from commit 99174e7630676307f618c252755a20ba61ad9158) --- src/mesa/drivers/dri/i965/brw_sf_state.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index c99918724b..38733c8c73 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -233,7 +233,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, else if (sf.sf6.line_width <= 0x2) sf.sf6.line_width = 0; - /* _NEW_POINT */ + /* _NEW_BUFFERS */ key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; if (!key->render_to_fbo) { /* Rendering to an OpenGL window */ @@ -263,6 +263,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, } /* XXX clamp max depends on AA vs. non-AA */ + /* _NEW_POINT */ sf.sf7.sprite_point = key->point_sprite; sf.sf7.point_size = CLAMP(rint(key->point_size), 1, 255) * (1<<3); sf.sf7.use_point_size_state = !key->point_attenuated; @@ -328,7 +329,8 @@ const struct brw_tracked_state brw_sf_unit = { .mesa = (_NEW_POLYGON | _NEW_LINE | _NEW_POINT | - _NEW_SCISSOR), + _NEW_SCISSOR | + _NEW_BUFFERS), .brw = BRW_NEW_URB_FENCE, .cache = (CACHE_NEW_SF_VP | CACHE_NEW_SF_PROG) -- cgit v1.2.3 From 2855ee82c6d74066e8d9e44b17b2ce3b5782110e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 19 Jun 2009 22:03:37 -0700 Subject: intel: Update Mesa state before span setup in glReadPixels. We could have mapped the wrong set of draw buffers. Noticed while looking into a DRI2 glean ReadPixels issue. (cherry picked from commit afc981ee46791838f3cb83e11eb33938aa3efc83) --- src/mesa/drivers/dri/i965/intel_pixel_read.c | 1 + 1 file changed, 1 insertion(+) create mode 120000 src/mesa/drivers/dri/i965/intel_pixel_read.c (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c new file mode 120000 index 0000000000..cc4589f4d4 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c @@ -0,0 +1 @@ +../intel/intel_pixel_read.c \ No newline at end of file -- cgit v1.2.3 From cf820a045f0626718ec147ebb26e31f82ec0b4fb Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 19 Jun 2009 22:12:52 -0700 Subject: intel: Also get the DRI2 front buffer when doing front buffer reading. (cherry picked from commit df70d3049a396af3601d2a1747770635a74120bb) --- src/mesa/drivers/dri/i965/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 2934414d99..81c2fdb0dc 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -22,6 +22,7 @@ DRIVER_SOURCES = \ intel_pixel_bitmap.c \ intel_pixel_copy.c \ intel_pixel_draw.c \ + intel_pixel_read.c \ intel_state.c \ intel_swapbuffers.c \ intel_tex.c \ -- cgit v1.2.3 From 8de625c7cf639c583e8bf43acb1214010989bb64 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 8 Sep 2009 12:21:42 -0600 Subject: i965: fix incorrect test for vertex position attribute --- src/mesa/drivers/dri/i965/brw_context.h | 2 ++ src/mesa/drivers/dri/i965/brw_draw.c | 1 + src/mesa/drivers/dri/i965/brw_draw_upload.c | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index e52fc3f374..5cf12fb353 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -386,6 +386,8 @@ struct brw_cached_batch_item { struct brw_vertex_element { const struct gl_client_array *glarray; + /** The corresponding Mesa vertex attribute */ + gl_vert_attrib attrib; /** Size of a complete element */ GLuint element_size; /** Number of uploaded elements for this input. */ diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 5342622a73..54b0661db8 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -185,6 +185,7 @@ static void brw_merge_inputs( struct brw_context *brw, for (i = 0; i < VERT_ATTRIB_MAX; i++) { brw->vb.inputs[i].glarray = arrays[i]; + brw->vb.inputs[i].attrib = (gl_vert_attrib) i; if (arrays[i]->StrideB != 0) brw->vb.info.varying |= 1 << i; diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 05079c043a..fd9c3915c4 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -422,7 +422,7 @@ static void brw_prepare_vertices(struct brw_context *brw) /* Queue the buffer object up to be uploaded in the next pass, * when we've decided if we're doing interleaved or not. */ - if (i == 0) { + if (input->attrib == VERT_ATTRIB_POS) { /* Position array not properly enabled: */ if (input->glarray->StrideB == 0) { -- cgit v1.2.3